"testing/vscode:/vscode.git/clone" did not exist on "225aca612e7e175a2c9ea4f73ba4140005a29759"
Commit 3a474fca authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into mi100_opts

parents d9568511 0b7672d7
...@@ -4,7 +4,7 @@ CheckOptions: ...@@ -4,7 +4,7 @@ CheckOptions:
- key: bugprone-unused-return-value.CheckedFunctions - key: bugprone-unused-return-value.CheckedFunctions
value: '::std::async;::std::launder;::std::remove;::std::remove_if;::std::unique;::std::unique_ptr::release;::std::basic_string::empty;::std::vector::empty;::std::find;::std::find_if;::std::find_if_not;::std::all_of;::std::any_of;::std::none_of;::std::count;::std::count_if;::std::mismatch;::std::find_end;::std::find_first_of;::std::adjacent_find;::std::search;::std::search_n;::std::nth_element;::std::lower_bound;::std::upper_bound;::std::binary_search;::std::equal_range;::std::max;::std::max_element;::std::min;::std::min_element;::std::minmax;::std::minmax_element;::std::equal;::std::lexicographical_compare;::std::accumulate;::std::inner_product' value: '::std::async;::std::launder;::std::remove;::std::remove_if;::std::unique;::std::unique_ptr::release;::std::basic_string::empty;::std::vector::empty;::std::find;::std::find_if;::std::find_if_not;::std::all_of;::std::any_of;::std::none_of;::std::count;::std::count_if;::std::mismatch;::std::find_end;::std::find_first_of;::std::adjacent_find;::std::search;::std::search_n;::std::nth_element;::std::lower_bound;::std::upper_bound;::std::binary_search;::std::equal_range;::std::max;::std::max_element;::std::min;::std::min_element;::std::minmax;::std::minmax_element;::std::equal;::std::lexicographical_compare;::std::accumulate;::std::inner_product'
- key: cppcoreguidelines-macro-usage.AllowedRegexp - key: cppcoreguidelines-macro-usage.AllowedRegexp
value: 'DEBUG|FALLTHROUGH|STRINGIZE|_HAS_|_THROW|_REQUIRES|_DECLARE_|_VISIT_|_REGISTER_|_GENERATE_|_DETAIL_|_TIDY_|_MANAGE_PTR|_MATCHER|DEVICE_SHARED' value: 'DEBUG|FALLTHROUGH|STRINGIZE|_HAS_|_THROW|_REQUIRES|_DECLARE_|_VISIT_|_REGISTER_|_GENERATE_|_DETAIL_|_TIDY_|_MANAGE_PTR|_MATCHER|DEVICE_SHARED|_WORKAROUND_'
- key: modernize-loop-convert.MinConfidence - key: modernize-loop-convert.MinConfidence
value: risky value: risky
- key: modernize-loop-convert.NamingStyle - key: modernize-loop-convert.NamingStyle
......
...@@ -223,7 +223,7 @@ rocm_create_package( ...@@ -223,7 +223,7 @@ rocm_create_package(
MAINTAINER "Paul Fultz II <paul.fultz@amd.com>" MAINTAINER "Paul Fultz II <paul.fultz@amd.com>"
LDCONFIG LDCONFIG
PTH PTH
DEPENDS miopen-hip rocblas hip-hcc half DEPENDS miopen-hip rocblas hip-rocclr hip-base half
) )
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
......
...@@ -124,8 +124,9 @@ def onnxnode(name, body) { ...@@ -124,8 +124,9 @@ def onnxnode(name, body) {
rocmtest onnx: onnxnode('rocmtest') { cmake_build -> rocmtest onnx: onnxnode('rocmtest') { cmake_build ->
stage("Onnx runtime") { stage("Onnx runtime") {
sh ''' sh '''
apt install half
ls -lR ls -lR
dpkg -i --force-depends ./build/*.deb dpkg -i ./build/*.deb
cd /onnxruntime && ./build_and_test_onnxrt.sh cd /onnxruntime && ./build_and_test_onnxrt.sh
''' '''
} }
......
...@@ -129,6 +129,7 @@ register_migraphx_ops( ...@@ -129,6 +129,7 @@ register_migraphx_ops(
min min
mul mul
multibroadcast multibroadcast
multinomial
neg neg
outline outline
pad pad
......
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/par_for.hpp>
#include <random>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct multinomial
{
shape::type_t dtype = shape::type_t::int32_type;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.dtype, "dtype"));
}
std::string name() const { return "multinomial"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2).only_dims(2);
size_t sample_size = inputs.back().lens().back();
if(not contains({shape::int32_type, shape::int64_type}, dtype))
MIGRAPHX_THROW(
"Multinomial: Invalid output type. Valid types are int32_type and int64_type.");
return {dtype, {inputs.front().lens().front(), sample_size}};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
size_t batch_size = output_shape.lens().front();
size_t class_size = args[0].get_shape().lens().back();
size_t sample_size = output_shape.lens().back();
visit_all(args[0], args[1])([&](auto cdf, auto dist) {
result.visit([&](auto output) {
par_for(batch_size * sample_size, [&](auto i) {
auto idx = args[1].get_shape().multi(i);
auto cdf_begin = cdf.begin() + (idx[0] * class_size);
auto cdf_end = cdf_begin + class_size;
auto sample_iter =
std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
output[i] = std::distance(cdf_begin, sample_iter);
});
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_depthtospace : op_parser<parse_depthtospace>
{
std::vector<op_desc> operators() const { return {{"DepthToSpace"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
auto s = args[0]->get_shape();
// mode attribute of DepthToSpace
auto mode = std::string("DCR");
if(contains(info.attributes, "mode"))
{
mode = info.attributes.at("mode").s(); // DCR or CRD?
}
// blocksize attribute of DepthToSpace
int blocksize = 0;
if(contains(info.attributes, "blocksize"))
{
blocksize = info.attributes.at("blocksize").i();
}
if(blocksize < 1)
{
MIGRAPHX_THROW("DepthToSpace: blocksize is less than 1");
}
// calculate dimensions
auto lens1 = s.lens();
auto lens2 = s.lens();
unsigned long divisor = std::pow(blocksize, 2);
if((lens2[1] % divisor) == 0)
lens2[1] = lens2[1] / divisor;
else
MIGRAPHX_THROW("DepthToSpace: div by blocksize quotient not int ");
lens1.push_back(lens1[2]);
lens1.push_back(lens1[3]);
lens2[2] = lens2[2] * blocksize;
lens2[3] = lens2[3] * blocksize;
lens1[2] = blocksize;
std::vector<int64_t> perm;
if(mode == "DCR")
{
lens1[3] = lens1[1] / divisor;
lens1[1] = blocksize;
perm = {0, 3, 4, 1, 5, 2};
}
else if(mode == "CRD")
{
lens1[1] = lens1[1] / divisor;
lens1[3] = blocksize;
perm = {0, 1, 4, 2, 5, 3};
}
else
MIGRAPHX_THROW("DepthToSpace: mode attribute cannot be read.");
auto temp1 = info.add_instruction(make_op("reshape", {{"dims", lens1}}), args[0]);
auto temp2 = info.add_instruction(make_op("transpose", {{"permutation", perm}}), temp1);
return info.add_instruction(make_op("reshape", {{"dims", lens2}}),
info.make_contiguous(temp2));
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <random>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_multinomial : op_parser<parse_multinomial>
{
std::vector<op_desc> operators() const { return {{"Multinomial"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
int dtype = 6;
if(contains(info.attributes, "dtype"))
dtype = info.attributes.at("dtype").i();
shape::type_t output_type = get_type(dtype);
size_t sample_size = 1;
if(contains(info.attributes, "sample_size"))
sample_size = info.attributes.at("sample_size").i();
float seed = static_cast<float>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
seed = info.attributes.at("seed").f();
// Subtract the per-batch maximum log-probability, making the per-batch max 0
auto maxes =
info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]);
auto mb_maxes = info.add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}),
maxes);
auto cdf = info.add_instruction(migraphx::make_op("sub"), args[0], mb_maxes);
// Take the element-wise exponent to get probabilities in the range (0, 1]
cdf = info.add_instruction(migraphx::make_op("exp"), cdf);
// Compute the cumulative density function
cdf = info.add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
// Pre-compute random distribution
std::mt19937 gen(seed);
std::uniform_real_distribution<> dis(0.0, 1.0);
size_t batch_size = args[0]->get_shape().lens().front();
migraphx::shape dist_shape{migraphx::shape::float_type, {batch_size, sample_size}};
std::vector<float> random_dist(batch_size * sample_size);
std::generate(random_dist.begin(), random_dist.end(), [&]() { return dis(gen); });
auto dist_lit = info.add_literal(migraphx::literal{dist_shape, random_dist});
return info.add_instruction(
migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, dist_lit);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <random>
#include <set>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_randomnormal_ops : op_parser<parse_randomnormal_ops>
{
const std::set<shape::type_t> valid_types = {
shape::float_type, shape::half_type, shape::double_type};
std::vector<op_desc> operators() const { return {{"RandomNormal"}, {"RandomNormalLike"}}; }
instruction_ref parse(const op_desc& opd,
const onnx_parser& parser,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
int dtype = 1;
bool use_dtype = false;
if(contains(info.attributes, "dtype"))
{
dtype = info.attributes.at("dtype").i();
use_dtype = true;
}
shape::type_t out_type = get_type(dtype);
if(not contains(valid_types, out_type))
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " + std::to_string(dtype) +
". Valid types are 1 (float), 10 (half), and 11 (double).");
float mean = 0.0;
if(contains(info.attributes, "mean"))
mean = info.attributes.at("mean").f();
float scale = 1.0;
if(contains(info.attributes, "scale"))
scale = info.attributes.at("scale").f();
float seed = static_cast<float>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
seed = info.attributes.at("seed").f();
shape out_shape;
if(contains(info.attributes, "shape"))
{
// RandomNormal:
// output type and shape must come from attributes
std::vector<int> out_lens;
literal ls = parser.parse_value(info.attributes.at("shape"));
ls.visit([&](auto s) { out_lens.assign(s.begin(), s.end()); });
out_shape = shape{out_type, out_lens};
}
else if(args.size() == 1)
{
// RandomNormalLike:
// output type and shape are the same as the input's by default
// dtype is used instead when attribute is set
if(not contains(valid_types, args[0]->get_shape().type()))
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " +
std::to_string(args[0]->get_shape().type()) +
". Valid types are float, half, and double.");
out_shape =
use_dtype ? shape{out_type, args[0]->get_shape().lens()} : args[0]->get_shape();
}
else
{
MIGRAPHX_THROW(opd.op_name +
": cannot deduce shape without shape attribute or argument.");
}
std::mt19937 gen(seed);
std::normal_distribution<> d(mean, scale);
std::vector<double> rand_vals(out_shape.elements());
std::generate(rand_vals.begin(), rand_vals.end(), [&]() { return d(gen); });
return info.add_literal(literal{out_shape, rand_vals});
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <random>
#include <set>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_randomuniform_ops : op_parser<parse_randomuniform_ops>
{
const std::set<shape::type_t> valid_types = {
shape::float_type, shape::half_type, shape::double_type};
std::vector<op_desc> operators() const { return {{"RandomUniform"}, {"RandomUniformLike"}}; }
instruction_ref parse(const op_desc& opd,
const onnx_parser& parser,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
int dtype = 1;
bool use_dtype = false;
if(contains(info.attributes, "dtype"))
{
dtype = info.attributes.at("dtype").i();
use_dtype = true;
}
shape::type_t out_type = get_type(dtype);
if(not contains(valid_types, out_type))
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " + std::to_string(dtype) +
". Valid types are 1 (float), 10 (half), and 11 (double).");
float high = 1.0;
if(contains(info.attributes, "high"))
high = info.attributes.at("high").f();
float low = 0.0;
if(contains(info.attributes, "low"))
low = info.attributes.at("low").f();
float seed = static_cast<float>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
seed = info.attributes.at("seed").f();
shape out_shape;
if(contains(info.attributes, "shape"))
{
// RandomUniform:
// output type and shape must come from attributes
std::vector<int> out_lens;
literal ls = parser.parse_value(info.attributes.at("shape"));
ls.visit([&](auto s) { out_lens.assign(s.begin(), s.end()); });
out_shape = shape{out_type, out_lens};
}
else if(args.size() == 1)
{
// RandomUniformLike:
// output type and shape are the same as the input by default
// dtype is used instead when attribute is set
if(not contains(valid_types, args[0]->get_shape().type()))
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " +
std::to_string(args[0]->get_shape().type()) +
". Valid types are float, half, and double.");
out_shape =
use_dtype ? shape{out_type, args[0]->get_shape().lens()} : args[0]->get_shape();
}
else
{
MIGRAPHX_THROW(opd.op_name +
": cannot deduce shape without shape attribute or argument.");
}
std::mt19937 gen(seed);
std::uniform_real_distribution<> d(high, low);
std::vector<double> rand_vals(out_shape.elements());
std::generate(rand_vals.begin(), rand_vals.end(), [&]() { return d(gen); });
return info.add_literal(literal{out_shape, rand_vals});
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -59,6 +59,7 @@ add_library(migraphx_device ...@@ -59,6 +59,7 @@ add_library(migraphx_device
device/mul.cpp device/mul.cpp
device/mul_add.cpp device/mul_add.cpp
device/mul_add_relu.cpp device/mul_add_relu.cpp
device/multinomial.cpp
device/pad.cpp device/pad.cpp
device/pow.cpp device/pow.cpp
device/prelu.cpp device/prelu.cpp
...@@ -143,6 +144,7 @@ add_library(migraphx_gpu ...@@ -143,6 +144,7 @@ add_library(migraphx_gpu
lrn.cpp lrn.cpp
leaky_relu.cpp leaky_relu.cpp
mlir_conv.cpp mlir_conv.cpp
multinomial.cpp
pack_args.cpp pack_args.cpp
pack_int8_args.cpp pack_int8_args.cpp
pad.cpp pad.cpp
...@@ -199,6 +201,7 @@ register_migraphx_gpu_ops(hip_ ...@@ -199,6 +201,7 @@ register_migraphx_gpu_ops(hip_
max max
min min
mul mul
multinomial
pad pad
pow pow
prelu prelu
......
...@@ -12,10 +12,6 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -12,10 +12,6 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
#if __AMDGCN_WAVEFRONT_SIZE == 32
#define MIGRAPHX_NO_DPP
#endif
#ifdef MIGRAPHX_NO_DPP #ifdef MIGRAPHX_NO_DPP
template <index_int N, template <index_int N,
class Op, class Op,
...@@ -98,10 +94,12 @@ __device__ void dpp_reduce(T& in, Op op) ...@@ -98,10 +94,12 @@ __device__ void dpp_reduce(T& in, Op op)
in = op(in, out); in = op(in, out);
out = dpp_mov<dpp_row_shr(8), 0xf, 0xc>(in); out = dpp_mov<dpp_row_shr(8), 0xf, 0xc>(in);
in = op(in, out); in = op(in, out);
#if __AMDGCN_WAVEFRONT_SIZE == 64
out = dpp_mov<dpp_row_bcast(15), 0xa>(in); out = dpp_mov<dpp_row_bcast(15), 0xa>(in);
in = op(in, out); in = op(in, out);
out = dpp_mov<dpp_row_bcast(31), 0xc>(in); out = dpp_mov<dpp_row_bcast(31), 0xc>(in);
in = op(in, out); in = op(in, out);
#endif
} }
__device__ inline void dpp_reduce(float& x, sum) __device__ inline void dpp_reduce(float& x, sum)
...@@ -118,9 +116,11 @@ __device__ inline void dpp_reduce(float& x, sum) ...@@ -118,9 +116,11 @@ __device__ inline void dpp_reduce(float& x, sum)
"s_nop 1\n" "s_nop 1\n"
"v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc\n" "v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc\n"
"s_nop 1\n" "s_nop 1\n"
#if __AMDGCN_WAVEFRONT_SIZE == 64
"v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa\n" "v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa\n"
"s_nop 1\n" "s_nop 1\n"
"v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc\n" "v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc\n"
#endif
"s_nop 1\n" "s_nop 1\n"
: "=v"(x) : "=v"(x)
: "0"(x)); : "0"(x));
...@@ -135,21 +135,27 @@ template <index_int N, ...@@ -135,21 +135,27 @@ template <index_int N,
MIGRAPHX_REQUIRES(not std::is_integral<ForStride>{})> MIGRAPHX_REQUIRES(not std::is_integral<ForStride>{})>
__device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f) __device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f)
{ {
using type = decltype(f(deduce_for_stride(fs)));
MIGRAPHX_DEVICE_SHARED type buffer[N / 64]; #if __AMDGCN_WAVEFRONT_SIZE == 32
constexpr index_int nthreads = 16;
#else
constexpr index_int nthreads = 64;
#endif
using type = decltype(f(deduce_for_stride(fs)));
MIGRAPHX_DEVICE_SHARED type buffer[N / nthreads];
type x = init; type x = init;
fs([&](auto i) { x = op(x, f(i)); }); fs([&](auto i) { x = op(x, f(i)); });
dpp_reduce(x, op); dpp_reduce(x, op);
const auto ldsidx = idx.local / 64; const auto ldsidx = idx.local / nthreads;
if((idx.local % 64) == 63) if((idx.local % nthreads) == nthreads - 1)
{ {
buffer[ldsidx] = x; buffer[ldsidx] = x;
} }
__syncthreads(); __syncthreads();
type y = init; type y = init;
for(index_int i = 0; i < idx.nlocal() / 64; i++) for(index_int i = 0; i < idx.nlocal() / nthreads; i++)
{ {
y = op(y, buffer[i]); y = op(y, buffer[i]);
} }
......
...@@ -8,6 +8,14 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -8,6 +8,14 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
#ifndef MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC
#if __AMDGCN_WAVEFRONT_SIZE == 32
#define MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC 1
#else
#define MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC 0
#endif
#endif
template <class T> template <class T>
struct vector_type struct vector_type
{ {
...@@ -86,10 +94,13 @@ __device__ void layernorm(index_int i, ...@@ -86,10 +94,13 @@ __device__ void layernorm(index_int i,
const bool in_range = idx.local < relements_v; const bool in_range = idx.local < relements_v;
auto mean = [&](auto z) { auto mean = [&](auto z) {
return auto_block_reduce<MaxBlockSize>( auto m = auto_block_reduce<MaxBlockSize>(
idx, sum{}, value_type(0), relements_v, [=](auto) { return z; }) / idx, sum{}, value_type(0), relements_v, [=](auto) { return z; }) /
value_type(relements); value_type(relements);
#if MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC
__builtin_amdgcn_s_barrier();
#endif
return m;
}; };
// m = x - mean(x) // m = x - mean(x)
......
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/multinomial.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
template <class Iterator, class T>
constexpr Iterator upper_bound(Iterator first, Iterator last, const T& value)
{
Iterator it;
typename std::iterator_traits<Iterator>::difference_type count;
typename std::iterator_traits<Iterator>::difference_type step;
count = std::distance(first, last);
while(count > 0)
{
it = first;
step = count / 2;
std::advance(it, step);
if(!(value < *it))
{
first = ++it;
count -= step + 1;
}
else
count = step;
}
return first;
}
void multinomial(hipStream_t stream,
const argument& result,
const argument& arg0,
const argument& arg1)
{
size_t batch_size = arg0.get_shape().lens().front();
size_t class_size = arg0.get_shape().lens().back();
size_t sample_size = result.get_shape().lens().back();
hip_visit_all(arg0, arg1)([&](auto cdf, auto dist) {
result.visit([&](auto out) {
hip_visit_views(out)([&](auto output) {
gs_launch(stream, batch_size * sample_size)([=](auto i) __device__ {
auto idx = output.get_shape().multi(i);
auto cdf_begin = cdf.begin() + (idx.front() * class_size);
auto cdf_end = cdf_begin + class_size;
auto sample_iter =
upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
output[i] = std::distance(cdf_begin, sample_iter);
});
});
});
});
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void multinomial(hipStream_t stream,
const argument& result,
const argument& arg0,
const argument& arg1);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_MULTINOMIAL_HPP
#include <migraphx/op/multinomial.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_multinomial
{
op::multinomial op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::multinomial"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -164,6 +164,7 @@ struct miopen_apply ...@@ -164,6 +164,7 @@ struct miopen_apply
add_extend_op("leaky_relu"); add_extend_op("leaky_relu");
add_extend_op("logsoftmax"); add_extend_op("logsoftmax");
add_extend_op("lrn"); add_extend_op("lrn");
add_extend_op("multinomial");
add_extend_op("pad"); add_extend_op("pad");
add_extend_op("pooling"); add_extend_op("pooling");
add_extend_op("prefix_scan_sum"); add_extend_op("prefix_scan_sum");
......
#include <migraphx/gpu/multinomial.hpp>
#include <migraphx/gpu/device/multinomial.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/check_shapes.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_multinomial::compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3).only_dims(2).standard();
inputs.pop_back();
return op.compute_shape(inputs);
}
argument
hip_multinomial::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::multinomial(ctx.get_stream().get(), args.back(), args.front(), args[1]);
return args.back();
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
depthtospace_crd_test:
6
xy" DepthToSpace*
blocksize*
mode"CRDdepthtospace_crd_testZ
x




b
y





B
\ No newline at end of file
depthtospace_simple_test:
6
xy" DepthToSpace*
blocksize*
mode"DCRdepthtospace_simple_testZ
x




b
y




B
\ No newline at end of file
depthtospace_test:
6
xy" DepthToSpace*
blocksize*
mode"DCRdepthtospace_testZ
x




b
y





B
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment