"vscode:/vscode.git/clone" did not exist on "6adb4b1cb57fe1f1a9430e8b6f97e4c710caf883"
Commit 70fe454f authored by Umang Yadav's avatar Umang Yadav
Browse files

Merge branch 'develop' into resnet50_partition

parents ea0b8059 f47e0b5b
...@@ -76,7 +76,7 @@ include(ROCMSetupVersion) ...@@ -76,7 +76,7 @@ include(ROCMSetupVersion)
option(BUILD_DEV "Build for development purpose only" OFF) option(BUILD_DEV "Build for development purpose only" OFF)
rocm_setup_version(VERSION 2.8.0) rocm_setup_version(VERSION 2.8.0)
set(MIGRAPHX_SO_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}) set(MIGRAPHX_SO_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR})
option( BUILD_SHARED_LIBS "Build as a shared library" ON ) option( BUILD_SHARED_LIBS "Build as a shared library" ON )
......
...@@ -35,7 +35,7 @@ fastjsonschema==2.16.3 ...@@ -35,7 +35,7 @@ fastjsonschema==2.16.3
# via rocm-docs-core # via rocm-docs-core
gitdb==4.0.10 gitdb==4.0.10
# via gitpython # via gitpython
gitpython==3.1.32 gitpython==3.1.37
# via rocm-docs-core # via rocm-docs-core
idna==3.4 idna==3.4
# via requests # via requests
...@@ -87,7 +87,7 @@ requests==2.28.2 ...@@ -87,7 +87,7 @@ requests==2.28.2
# via # via
# pygithub # pygithub
# sphinx # sphinx
rocm-docs-core==0.24.2 rocm-docs-core==0.26.0
# via -r requirements.in # via -r requirements.in
smmap==5.0.0 smmap==5.0.0
# via gitdb # via gitdb
...@@ -130,7 +130,7 @@ sphinxcontrib-serializinghtml==1.1.5 ...@@ -130,7 +130,7 @@ sphinxcontrib-serializinghtml==1.1.5
# via sphinx # via sphinx
typing-extensions==4.5.0 typing-extensions==4.5.0
# via pydata-sphinx-theme # via pydata-sphinx-theme
urllib3==1.26.15 urllib3==1.26.18
# via requests # via requests
wrapt==1.15.0 wrapt==1.15.0
# via deprecated # via deprecated
...@@ -46,7 +46,7 @@ struct MIGRAPHX_EXPORT argument : raw_data<argument> ...@@ -46,7 +46,7 @@ struct MIGRAPHX_EXPORT argument : raw_data<argument>
{ {
argument() = default; argument() = default;
argument(const shape& s); explicit argument(const shape& s);
template <class F, MIGRAPHX_REQUIRES(std::is_pointer<decltype(std::declval<F>()())>{})> template <class F, MIGRAPHX_REQUIRES(std::is_pointer<decltype(std::declval<F>()())>{})>
argument(shape s, F d) argument(shape s, F d)
......
...@@ -88,13 +88,13 @@ struct allocate ...@@ -88,13 +88,13 @@ struct allocate
{ {
if(args.empty()) if(args.empty())
{ {
return {output_shape}; return argument{output_shape};
} }
else else
{ {
std::vector<std::size_t> output_dims(output_shape.ndim()); std::vector<std::size_t> output_dims(output_shape.ndim());
args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); }); args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
return {shape{buf_type, output_dims}}; return argument{shape{buf_type, output_dims}};
} }
} }
}; };
......
...@@ -411,7 +411,7 @@ struct pooling ...@@ -411,7 +411,7 @@ struct pooling
// for dynamic GlobalPooling, there's no padding // for dynamic GlobalPooling, there's no padding
kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end()); kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end());
output_shape = dyn_out.computed_shape; output_shape = dyn_out.computed_shape;
result = dyn_out.computed_shape; result = argument{dyn_out.computed_shape};
} }
else if((padding_mode != op::padding_mode_t::default_)) else if((padding_mode != op::padding_mode_t::default_))
{ {
...@@ -439,7 +439,7 @@ struct pooling ...@@ -439,7 +439,7 @@ struct pooling
{ {
kernel_dims = this->lengths; kernel_dims = this->lengths;
output_shape = dyn_out.computed_shape; output_shape = dyn_out.computed_shape;
result = dyn_out.computed_shape; result = argument{dyn_out.computed_shape};
} }
// Perform the computation and populate result // Perform the computation and populate result
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_mean_variance_normalization : op_parser<parse_mean_variance_normalization>
{
std::vector<op_desc> operators() const { return {{"MeanVarianceNormalization"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
onnx_parser::node_info info,
std::vector<instruction_ref> args) const
{
auto&& data = args.front();
auto data_rank = data->get_shape().ndim();
std::vector<int64_t> axes{0, 2, 3};
if(contains(info.attributes, "axes"))
{
const auto& axes_attr = info.attributes["axes"].ints();
axes.assign(axes_attr.begin(), axes_attr.end());
}
else if(data_rank != 4)
{
MIGRAPHX_THROW(
"Input tensor needs to be rank 4 when axes is not specified. Instead it is rank " +
std::to_string(data_rank));
}
if(axes.size() != data_rank - 1)
{
MIGRAPHX_THROW("Length of axes array needs to be equal to input tensor rank - 1");
}
auto data_mean = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), data);
auto data_mean_squared = info.add_common_op("mul", data_mean, data_mean);
auto data_squared = info.add_common_op("mul", data, data);
auto data_squared_mean =
info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), data_squared);
auto mean_sub = info.add_common_op("sub", data_squared_mean, data_mean_squared);
auto std = info.add_common_op("sqrt", mean_sub);
auto dividend = info.add_common_op("sub", data, data_mean);
auto epsilon =
info.add_literal({data->get_shape().type(),
{data->get_shape().type() == shape::half_type ? 1e-7 : 1e-9}});
auto divisor = info.add_common_op("add", std, epsilon);
return info.add_common_op("div", dividend, divisor);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -56,9 +56,6 @@ struct parse_trilu : op_parser<parse_trilu> ...@@ -56,9 +56,6 @@ struct parse_trilu : op_parser<parse_trilu>
k = arg_k.at<int>(); k = arg_k.at<int>();
} }
if(k < 0)
MIGRAPHX_THROW("PARSE_TRILU: negative k values not supported");
if(contains(info.attributes, "upper")) if(contains(info.attributes, "upper"))
{ {
upper = static_cast<bool>(info.attributes.at("upper").i()); upper = static_cast<bool>(info.attributes.at("upper").i());
...@@ -69,9 +66,12 @@ struct parse_trilu : op_parser<parse_trilu> ...@@ -69,9 +66,12 @@ struct parse_trilu : op_parser<parse_trilu>
// when creating the mask, if upper == 1, // when creating the mask, if upper == 1,
// the inner triangle will have values set to 0 // the inner triangle will have values set to 0
std::vector<bool> mask_mat(num_rows * num_cols, upper); std::vector<bool> mask_mat(num_rows * num_cols, upper);
// if upper == 0, kth diagonal must also be masked
if(not upper)
k++;
for(size_t i = 0; i < num_rows; i++) for(size_t i = 0; i < num_rows; i++)
{ {
for(size_t j = 0; j < std::min(k, static_cast<int>(num_cols)); j++) for(int j = 0; j < std::min(k, static_cast<int>(num_cols)); j++)
{ {
mask_mat[i * num_cols + j] = not upper; mask_mat[i * num_cols + j] = not upper;
} }
......
...@@ -33,6 +33,8 @@ ...@@ -33,6 +33,8 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK_WORKAROUNDS);
void apply_quantizelinear(module& m, instruction_ref ins) void apply_quantizelinear(module& m, instruction_ref ins)
{ {
assert(ins->name() == "quantizelinear"); assert(ins->name() == "quantizelinear");
...@@ -62,9 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins) ...@@ -62,9 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant = qt.max(); max_quant = qt.max();
min_quant = qt.min(); min_quant = qt.min();
}); });
auto s = add_zero_point->get_shape(); auto s = add_zero_point->get_shape();
auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}}); instruction_ref min_arg;
auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}}); instruction_ref max_arg;
if(enabled(MIGRAPHX_ENABLE_CK_WORKAROUNDS{}))
{
std::vector<int> min_data(s.elements(), min_quant);
std::vector<int> max_data(s.elements(), max_quant);
min_arg = m.add_literal(literal(s, min_data));
max_arg = m.add_literal(literal(s, max_data));
}
else
{
min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
}
auto saturate = insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg}); auto saturate = insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg});
m.replace_instruction( m.replace_instruction(
ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate); ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
......
...@@ -139,6 +139,12 @@ void hip_compile_options::set_launch_params( ...@@ -139,6 +139,12 @@ void hip_compile_options::set_launch_params(
global = compute_global(local); global = compute_global(local);
} }
static bool hip_accept_non_uniform_wg()
{
static bool non_uniform_wg = hip_has_flags({"-fno-offload-uniform-block"});
return non_uniform_wg;
}
std::function<std::size_t(std::size_t local)> std::function<std::size_t(std::size_t local)>
compute_global_for(context& ctx, std::size_t n, std::size_t over) compute_global_for(context& ctx, std::size_t n, std::size_t over)
{ {
...@@ -146,13 +152,14 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over) ...@@ -146,13 +152,14 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std::size_t max_global = ctx.get_current_device().get_cu_count() * std::size_t max_global = ctx.get_current_device().get_cu_count() *
ctx.get_current_device().get_max_workitems_per_cu(); ctx.get_current_device().get_max_workitems_per_cu();
return [n, over, max_global](std::size_t local) { return [n, over, max_global](std::size_t local) {
// hip require global workitems multiple of local workitems. It may degrade performance. std::size_t num_elements = n;
// [TODO]: consider adding "fno-hip-uniform-block" flag when it becomes available. if(not hip_accept_non_uniform_wg())
// https://reviews.llvm.org/D155213 {
std::size_t num_elements = ((n + local - 1) / local) * local; num_elements = (1 + (n - 1) / local) * local;
std::size_t groups = (num_elements + local - 1) / local; }
std::size_t max_blocks = max_global / local; std::size_t groups = 1 + (num_elements - 1) / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local; std::size_t max_blocks = max_global / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local;
return std::min(nglobal, num_elements); return std::min(nglobal, num_elements);
}; };
} }
...@@ -183,6 +190,11 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option ...@@ -183,6 +190,11 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs); generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs);
srcs.emplace_back("args.hpp", args_hpp); srcs.emplace_back("args.hpp", args_hpp);
if(options.global % options.local != 0 and hip_accept_non_uniform_wg())
options.params += " -fno-offload-uniform-block";
else
assert(options.global % options.local == 0);
options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global); options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global);
options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local); options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local);
options.params += " " + join_strings(compiler_warnings(), " "); options.params += " " + join_strings(compiler_warnings(), " ");
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <migraphx/matcher.hpp> #include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp> #include <migraphx/pass_manager.hpp>
#include <migraphx/register_op.hpp> #include <migraphx/register_op.hpp>
#include <migraphx/gpu/device_name.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -92,6 +93,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins) ...@@ -92,6 +93,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
auto m = a.lens()[a.lens().size() - 2]; auto m = a.lens()[a.lens().size() - 2];
auto n = b.lens().back(); auto n = b.lens().back();
auto k = a.lens().back(); auto k = a.lens().back();
auto batch_size = std::accumulate(
a.lens().rbegin() + 2, a.lens().rend(), std::size_t{1}, std::multiplies<std::size_t>());
// Integer gemms must be divisible by 4 in ck // Integer gemms must be divisible by 4 in ck
if(contains({shape::int8_type, shape::int32_type}, ins->get_shape().type())) if(contains({shape::int8_type, shape::int32_type}, ins->get_shape().type()))
{ {
...@@ -102,9 +105,17 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins) ...@@ -102,9 +105,17 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
if(k % 4 != 0) if(k % 4 != 0)
return false; return false;
} }
// Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy auto device_name = trim(split_string(get_device_name(), ':').front());
// to avoid poor-performing GEMM kernels from CK if(device_name == "gfx940")
// To-do: Investigate a more precise strategy {
if(ins->get_shape().type() == shape::half_type)
{
if(batch_size >= 64)
return m < 2048 or k <= 64 or n <= 384 or n >= 2048;
return true;
}
return true;
}
return k <= 2048; return k <= 2048;
} }
...@@ -140,6 +151,10 @@ struct find_ck_gemm_pointwise ...@@ -140,6 +151,10 @@ struct find_ck_gemm_pointwise
return not input->inputs().empty() and input->inputs().front()->name() == "capture"; return not input->inputs().empty() and input->inputs().front()->name() == "capture";
})) }))
return; return;
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) {
return not input->inputs().empty() and input->inputs().front()->name() == "capture";
}))
return;
assert(gemm_it != inputs.end()); assert(gemm_it != inputs.end());
if(gemm_idx != 0) if(gemm_idx != 0)
{ {
......
...@@ -199,9 +199,9 @@ struct miopen_convolution ...@@ -199,9 +199,9 @@ struct miopen_convolution
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6 // MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate = true; preallocate = true;
#endif #endif
auto x = preallocate ? to_gpu(generate_argument(x_shape)) : inputs[0]; auto x = preallocate ? to_gpu(generate_argument(x_shape)) : argument{inputs[0]};
auto w = preallocate ? to_gpu(generate_argument(w_shape)) : inputs[1]; auto w = preallocate ? to_gpu(generate_argument(w_shape)) : argument{inputs[1]};
auto y = preallocate ? allocate_gpu(output_shape) : inputs[2]; auto y = preallocate ? allocate_gpu(output_shape) : argument{inputs[2]};
auto workspace = auto workspace =
preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape); preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape);
......
...@@ -31,6 +31,14 @@ ...@@ -31,6 +31,14 @@
#include <migraphx/kernels/debug.hpp> #include <migraphx/kernels/debug.hpp>
#include <migraphx/kernels/functional.hpp> #include <migraphx/kernels/functional.hpp>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-identifier"
extern "C" __device__ size_t __ockl_get_enqueued_local_size(uint); // NOLINT
extern "C" __device__ size_t __ockl_get_local_size(uint); // NOLINT
#pragma clang diagnostic pop
#endif
namespace migraphx { namespace migraphx {
#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL) #if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL)
...@@ -45,43 +53,37 @@ inline __device__ __attribute__((const)) index_int compute_global_size() ...@@ -45,43 +53,37 @@ inline __device__ __attribute__((const)) index_int compute_global_size()
// This actualy works even when global is not divisible by local size. // This actualy works even when global is not divisible by local size.
// This doesnt actually do a multiplicatiosn. Instead it calls a device // This doesnt actually do a multiplicatiosn. Instead it calls a device
// function to get the global size, which is why it works. // function to get the global size, which is why it works.
return blockDim.x * gridDim.x; // NOLINT return blockDim.x * gridDim.x; // NOLINT
#endif #endif
} }
// We cant just use blockDim.x to get the local size since its broken on hip #ifdef MIGRAPHX_NGROUP
// when global is not divisible by local size. In this case, we calulate the // If global is divisible by local then local can be a const
// size for the last group. #if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#define MIGRAPHX_HAS_CONST_LOCAL 1
#endif
#endif
inline __device__ __attribute__((const)) index_int compute_local_size() inline __device__ __attribute__((const)) index_int compute_local_size()
{ {
#ifdef MIGRAPHX_NLOCAL #ifdef MIGRAPHX_HAS_CONST_LOCAL
const auto nlocal = MIGRAPHX_NLOCAL; return MIGRAPHX_NLOCAL;
#else
const auto nlocal = blockDim.x; // NOLINT
#endif
#ifdef MIGRAPHX_NGROUP
const auto ngroup = MIGRAPHX_NGROUP;
#else #else
const auto ngroup = gridDim.x; // NOLINT // Returns block size. For the non-uniform block it returns the size of the non-uniform block.
return __ockl_get_local_size(0); // NOLINT
#endif #endif
const auto group_id = blockIdx.x; // NOLINT
const auto nglobal = compute_global_size();
if(group_id == ngroup - 1)
{
return 1 + (nglobal - 1) % nlocal;
}
else
{
return nlocal; // NOLINT
}
} }
#ifdef MIGRAPHX_NGROUP inline __device__ __attribute__((const)) index_int compute_max_local_size()
// If global is divisible by local then local can be a const {
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1) #ifdef MIGRAPHX_LOCAL
#define MIGRAPHX_HAS_CONST_LOCAL 1 return MIGRAPHX_NLOCAL;
#endif #else
// Returns the block size. When workgrop has non-uniform block, this returns size of the uniform
// block.
return __ockl_get_enqueued_local_size(0); // NOLINT
#endif #endif
}
struct index struct index
{ {
...@@ -126,8 +128,8 @@ struct index ...@@ -126,8 +128,8 @@ struct index
#else #else
__device__ index_int max_nlocal() const __device__ index_int max_nlocal() const
{ {
MIGRAPHX_ASSERT(blockDim.x > 0); MIGRAPHX_ASSERT(compute_max_local_size() > 0);
return blockDim.x; return compute_max_local_size();
} }
#endif #endif
...@@ -249,7 +251,8 @@ struct index ...@@ -249,7 +251,8 @@ struct index
#endif #endif
inline __device__ __attribute__((const)) index make_index() inline __device__ __attribute__((const)) index make_index()
{ {
return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT return index{
blockIdx.x * compute_max_local_size() + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
} }
} // namespace migraphx } // namespace migraphx
......
...@@ -55,7 +55,7 @@ struct allocate ...@@ -55,7 +55,7 @@ struct allocate
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
......
...@@ -60,7 +60,7 @@ struct concat ...@@ -60,7 +60,7 @@ struct concat
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
...@@ -104,7 +104,7 @@ struct allocate ...@@ -104,7 +104,7 @@ struct allocate
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
......
...@@ -55,7 +55,7 @@ struct allocate ...@@ -55,7 +55,7 @@ struct allocate
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
......
...@@ -57,7 +57,7 @@ struct normalize_test_op ...@@ -57,7 +57,7 @@ struct normalize_test_op
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
......
6d7bc2a097a1a08541cd0d4628831c79ab8092d5 635d3faa3b3908d2806d009dc6872152cfcfcdda
...@@ -4681,6 +4681,77 @@ def mean_integral_test(): ...@@ -4681,6 +4681,77 @@ def mean_integral_test():
return ([node], data, [mean]) return ([node], data, [mean])
def mvn_default_axes_test_base(dims, type=TensorProto.FLOAT):
data = helper.make_tensor_value_info("data", type, dims)
out = helper.make_tensor_value_info("out", type, dims)
node = helper.make_node("MeanVarianceNormalization",
inputs=["data"],
outputs=["out"])
return ([node], [data], [out])
@onnx_test()
def mvn_default_axes_test():
return mvn_default_axes_test_base([2, 2, 2, 2])
@onnx_test()
def mvn_default_axes_fp16_test():
return mvn_default_axes_test_base([2, 2, 2, 2], TensorProto.FLOAT16)
@onnx_test()
def mvn_default_axes_rank_too_small_test():
return mvn_default_axes_test_base([2, 2, 2])
@onnx_test()
def mvn_default_axes_rank_too_big_test():
return mvn_default_axes_test_base([2, 2, 2, 2, 2])
def mvn_n_rank_test_base(axes, dims, type=TensorProto.FLOAT):
data = helper.make_tensor_value_info("data", type, dims)
out = helper.make_tensor_value_info("out", type, dims)
node = helper.make_node("MeanVarianceNormalization",
inputs=["data"],
outputs=["out"],
axes=axes)
return ([node], [data], [out])
@onnx_test()
def mvn_rank_2_test():
return mvn_n_rank_test_base([1], [2, 2])
@onnx_test()
def mvn_rank_2_fp16_test():
return mvn_n_rank_test_base([1], [2, 2], TensorProto.FLOAT16)
@onnx_test()
def mvn_rank_3_test():
return mvn_n_rank_test_base([0, 1], [2, 2, 2])
@onnx_test()
def mvn_rank_3_fp16_test():
return mvn_n_rank_test_base([0, 1], [2, 2, 2], TensorProto.FLOAT16)
@onnx_test()
def mvn_axes_rank_too_small_test():
return mvn_n_rank_test_base([0, 1, 2], [2, 2, 2])
@onnx_test()
def mvn_axes_rank_too_big_test():
return mvn_n_rank_test_base([0], [2, 2, 2])
@onnx_test() @onnx_test()
def min_test(): def min_test():
a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3]) a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])
...@@ -8502,7 +8573,7 @@ def transpose_gather_test(): ...@@ -8502,7 +8573,7 @@ def transpose_gather_test():
@onnx_test() @onnx_test()
def trilu_test(): def triu_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4]) y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4])
...@@ -8515,7 +8586,7 @@ def trilu_test(): ...@@ -8515,7 +8586,7 @@ def trilu_test():
@onnx_test() @onnx_test()
def trilu_batch_diff_k_test(): def triu_batch_diff_k_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 2, 3]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 2, 3])
k = np.array([2]) k = np.array([2])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 2, 3]) y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 2, 3])
...@@ -8533,7 +8604,24 @@ def trilu_batch_diff_k_test(): ...@@ -8533,7 +8604,24 @@ def trilu_batch_diff_k_test():
@onnx_test() @onnx_test()
def trilu_lower_test(): def tril_batch_diff_k_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 2, 3])
k = np.array([2])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 2, 3])
k_tensor = helper.make_tensor(name='k',
data_type=TensorProto.INT64,
dims=k.shape,
vals=k.astype(np.int64))
node = onnx.helper.make_node('Trilu',
inputs=['x', 'k'],
outputs=['y'],
upper=0)
return ([node], [x], [y], [k_tensor])
@onnx_test()
def tril_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4]) y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4])
...@@ -8542,7 +8630,7 @@ def trilu_lower_test(): ...@@ -8542,7 +8630,7 @@ def trilu_lower_test():
@onnx_test() @onnx_test()
def trilu_neg_k_test(): def triu_neg_k_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4])
k = np.array([-1]) k = np.array([-1])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4]) y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4])
...@@ -8556,7 +8644,23 @@ def trilu_neg_k_test(): ...@@ -8556,7 +8644,23 @@ def trilu_neg_k_test():
@onnx_test() @onnx_test()
def trilu_out_k_test(): def tril_neg_k_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4])
k = np.array([-1])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4])
k_tensor = helper.make_tensor(name='k',
data_type=TensorProto.INT64,
dims=k.shape,
vals=k.astype(np.int64))
node = onnx.helper.make_node('Trilu',
inputs=['x', 'k'],
outputs=['y'],
upper=0)
return ([node], [x], [y], [k_tensor])
@onnx_test()
def triu_out_k_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4])
k = np.array([5]) k = np.array([5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4]) y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4])
...@@ -8570,7 +8674,23 @@ def trilu_out_k_test(): ...@@ -8570,7 +8674,23 @@ def trilu_out_k_test():
@onnx_test() @onnx_test()
def trilu_row_one_test(): def tril_out_k_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4])
k = np.array([5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3, 4])
k_tensor = helper.make_tensor(name='k',
data_type=TensorProto.INT64,
dims=k.shape,
vals=k.astype(np.int64))
node = onnx.helper.make_node('Trilu',
inputs=['x', 'k'],
outputs=['y'],
upper=0)
return ([node], [x], [y], [k_tensor])
@onnx_test()
def triu_row_one_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 4]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 4])
k = np.array([1]) k = np.array([1])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4]) y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4])
...@@ -8587,6 +8707,23 @@ def trilu_row_one_test(): ...@@ -8587,6 +8707,23 @@ def trilu_row_one_test():
return ([node], [x], [y], [k_tensor]) return ([node], [x], [y], [k_tensor])
@onnx_test()
def tril_row_one_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 4])
k = np.array([1])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4])
k_tensor = helper.make_tensor(name='k',
data_type=TensorProto.INT64,
dims=k.shape,
vals=k.astype(np.int64))
node = onnx.helper.make_node('Trilu',
inputs=['x', 'k'],
outputs=['y'],
upper=0)
return ([node], [x], [y], [k_tensor])
@onnx_test() @onnx_test()
def undefined_test(): def undefined_test():
x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [2, 3, 4, 5]) x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [2, 3, 4, 5])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment