Commit 4e3ca586 authored by Khalique's avatar Khalique
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into broadcast_attr

parents 1775c5ad 31b2c735
......@@ -74,7 +74,7 @@
</message>
</rule>
<rule>
<pattern>(fclose|free|hipFree|hipHostFree|hipFreeArray|hipMemFree|hipStreamDestroy|hipEventDestroy|hipArrayDestroy|hipCtxDestroy|hipDestroyTextureObject|hipDestroySurfaceObject) \(</pattern>
<pattern>\\W(fclose|free|hipFree|hipHostFree|hipFreeArray|hipMemFree|hipStreamDestroy|hipEventDestroy|hipArrayDestroy|hipCtxDestroy|hipDestroyTextureObject|hipDestroySurfaceObject) \(</pattern>
<message>
<id>useManagePointer</id>
<severity>style</severity>
......
......@@ -13,8 +13,6 @@ inline namespace MIGRAPHX_INLINE_NS {
void eliminate_allocation::apply(program& p) const
{
assert(alignment > 0);
if(!enabled(MIGRAPHX_DISABLE_MEMORY_COLORING{}))
return;
std::size_t n = 0;
std::vector<std::pair<instruction_ref, std::size_t>> allocs;
......@@ -27,13 +25,16 @@ void eliminate_allocation::apply(program& p) const
std::size_t padding = (alignment - (size % alignment)) % alignment;
n += size + padding;
}
auto mem = p.add_parameter("memory", shape{shape::int8_type, {n}});
for(auto&& pp : allocs)
if(n > 0)
{
auto ins = pp.first;
auto s = ins->get_shape();
auto offset = pp.second;
p.replace_instruction(ins, op::load{s, offset}, mem);
auto mem = p.add_parameter("memory", shape{shape::int8_type, {n}});
for(auto&& pp : allocs)
{
auto ins = pp.first;
auto s = ins->get_shape();
auto offset = pp.second;
p.replace_instruction(ins, op::load{s, offset}, mem);
}
}
}
......
......@@ -36,14 +36,17 @@ void eliminate_concat::apply(program& p) const
// Where are the allocations for the tensors to be concatenated?
std::vector<instruction_ref> allocations;
for(auto ins2 = ins->inputs().begin(); ins2 != ins->inputs().end() - 1; ins2++)
{
auto last2 = (*ins2)->inputs().back();
if(last2->name() == concat_opt.allocate())
{
allocations.push_back(last2);
}
}
std::transform(
ins->inputs().begin(),
std::prev(ins->inputs().end()),
std::back_inserter(allocations),
[&](instruction_ref x) { return instruction::get_output_alias(x, true); });
if(std::any_of(allocations.begin(), allocations.end(), [&](auto x) {
return x->name() != concat_opt.allocate();
}))
continue;
// Need to sort the allocations, so that we know where to
// insert the "super"-allocation
std::sort(
......@@ -51,15 +54,15 @@ void eliminate_concat::apply(program& p) const
return std::distance(p.begin(), x) < std::distance(p.begin(), y);
});
// Move "super" allocation to the front
auto first = allocations.front();
auto super = p.move_instruction(last, first);
auto first = allocations.front();
auto super = p.move_instruction(last, first);
// Replace each allocation with a load
std::size_t offset = 0;
for(auto x : allocations)
for(auto alloc : allocations)
{
migraphx::op::load op{x->get_shape(), offset};
// migraphx::op::load op{x->get_shape(), 0};
p.replace_instruction(x, op, {super});
offset += x->get_shape().bytes();
op::load op{alloc->get_shape(), offset};
p.replace_instruction(alloc, op, {super});
offset += alloc->get_shape().bytes();
}
std::vector<instruction_ref> args = {super};
std::copy(ins->inputs().begin(), ins->inputs().end() - 1, std::back_inserter(args));
......
......@@ -15,11 +15,19 @@ struct check_context
std::string name() const { return "check_context"; }
shape compute_shape(const std::vector<shape>&) const { return {}; }
argument compute(context& ctx, const shape&, const std::vector<argument>&) const
{
this->check(ctx);
return {};
}
void finalize(context& ctx, const shape&, const std::vector<shape>&) const
{
this->check(ctx);
}
void check(context& ctx) const
{
T* x = any_cast<T>(&ctx);
if(x == nullptr)
MIGRAPHX_THROW(std::string("Unexpected context type: ") + ctx.type_id().name());
return {};
}
};
......
......@@ -119,6 +119,13 @@ struct concat_optimization
return (*this).private_detail_te_get_handle().get_concat(op);
}
friend bool is_shared(const concat_optimization& private_detail_x,
const concat_optimization& private_detail_y)
{
return private_detail_x.private_detail_te_handle_mem_var ==
private_detail_y.private_detail_te_handle_mem_var;
}
private:
struct private_detail_te_handle_base_type
{
......
......@@ -95,7 +95,13 @@ struct context
void finish() const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().finish();
(*this).private_detail_te_get_handle().finish();
}
friend bool is_shared(const context& private_detail_x, const context& private_detail_y)
{
return private_detail_x.private_detail_te_handle_mem_var ==
private_detail_y.private_detail_te_handle_mem_var;
}
private:
......@@ -136,7 +142,7 @@ struct context
const std::type_info& type() const override { return typeid(private_detail_te_value); }
void finish() const override { return private_detail_te_value.finish(); }
void finish() const override { private_detail_te_value.finish(); }
PrivateDetailTypeErasedT private_detail_te_value;
};
......
......@@ -94,6 +94,12 @@ constexpr void each_args(F)
{
}
template <class F, class T>
auto unpack(F f, T& x)
{
return sequence_c<std::tuple_size<T>{}>([&](auto... is) { f(std::get<is>(x)...); });
}
/// Implements a fix-point combinator
template <class R, class F>
detail::fix_f<R, F> fix(F f)
......
......@@ -14,6 +14,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
shape compute_shape(const operation& op, const std::vector<instruction_ref>& args);
std::vector<shape> to_shapes(const std::vector<instruction_ref>& args);
struct instruction
{
......@@ -71,7 +72,11 @@ struct instruction
static void
replace(instruction_ref ins, operation o, const shape& r, std::vector<instruction_ref> args);
static instruction_ref get_output_alias(instruction_ref ins);
argument eval() const;
void finalize(context& ctx);
static instruction_ref get_output_alias(instruction_ref ins, bool shallow = false);
private:
// internal
......
......@@ -26,6 +26,8 @@ struct operation
{
/// A unique name identifying the operation
std::string name() const;
/// An optional method that can be used to finalize the operator before running
void finalize(context& ctx);
/// This is used to compute the resulting shape from an operation. If an
/// operation cannot be run with input shapes, then it should throw an
/// exception.
......@@ -53,6 +55,11 @@ struct operation
friend std::ostream& operator<<(std::ostream& os, const operation& op);
};
/// Returns true if operation does not require a context to run compute
bool is_context_free(const operation& x);
/// Returns true if the operation has a finalize method
bool has_finalize(const operation& x);
#else
namespace operation_stream {
......@@ -89,7 +96,7 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
} // namespace operation_equal
template <class T>
auto compute_op(rank<1>,
auto compute_op(rank<2>,
const T& x,
context& ctx,
const shape& output_shape,
......@@ -99,6 +106,14 @@ auto compute_op(rank<1>,
return x.compute(auto_any_cast(ctx), output_shape, input);
}
template <class T>
auto compute_op(
rank<1>, const T& x, context&, const shape& output_shape, const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input))
{
return x.compute(output_shape, input);
}
template <class T>
argument compute_op(rank<0>, const T& x, context&, const shape&, const std::vector<argument>&)
{
......@@ -110,7 +125,53 @@ template <class T>
argument
compute_op(const T& x, context& ctx, const shape& output_shape, const std::vector<argument>& input)
{
return compute_op(rank<1>{}, x, ctx, output_shape, input);
return compute_op(rank<2>{}, x, ctx, output_shape, input);
}
template <class T>
auto compute_op(rank<2>, const T& x, const shape& output_shape, const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input))
{
return x.compute(output_shape, input);
}
template <class T>
auto compute_op(rank<1>, const T& x, const shape& output_shape, const std::vector<argument>& input)
-> decltype(x.compute(auto_any_cast(std::declval<context&>()), output_shape, input))
{
std::string name = x.name();
MIGRAPHX_THROW("Not computable without a context: " + name);
}
template <class T>
argument compute_op(rank<0>, const T& x, const shape&, const std::vector<argument>&)
{
std::string name = x.name();
MIGRAPHX_THROW("Not computable: " + name);
}
template <class T>
argument compute_op(const T& x, const shape& output_shape, const std::vector<argument>& input)
{
return compute_op(rank<2>{}, x, output_shape, input);
}
template <class T>
auto is_context_free_op(rank<1>,
const T& x,
const shape& output_shape,
const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input), std::true_type{});
template <class T>
auto is_context_free_op(rank<0>, const T&, const shape&, const std::vector<argument>&)
-> std::false_type;
template <class T>
auto is_context_free_op(const T& x) -> decltype(is_context_free_op(
rank<1>{}, x, std::declval<const shape&>(), std::declval<std::vector<argument>>()))
{
return {};
}
template <class T>
......@@ -132,15 +193,57 @@ int output_alias_op(const T& x, const std::vector<shape>& shapes)
return output_alias_op(rank<1>{}, x, shapes);
}
template <class T>
auto finalize_op(
rank<1>, T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
-> decltype(x.finalize(auto_any_cast(ctx), output_shape, input), void())
{
x.finalize(auto_any_cast(ctx), output_shape, input);
}
template <class T>
void finalize_op(rank<0>, T&, context&, const shape&, const std::vector<shape>&)
{
}
template <class T>
void finalize_op(T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
{
finalize_op(rank<1>{}, x, ctx, output_shape, input);
}
template <class T>
auto has_finalize_op(
rank<1>, T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
-> decltype(x.finalize(auto_any_cast(ctx), output_shape, input), std::true_type{});
template <class T>
auto has_finalize_op(rank<0>, T&, context&, const shape&, const std::vector<shape>&)
-> std::false_type;
template <class T>
auto has_finalize_op(const T&) -> decltype(has_finalize_op(rank<1>{},
std::declval<T&>(),
std::declval<context&>(),
std::declval<const shape&>(),
std::declval<std::vector<shape>>()))
{
return {};
}
/*
* Type-erased interface for:
*
* struct operation
* {
* std::string name() const;
* bool is_context_free() const;
* bool has_finalize() const;
* int output_alias(const std::vector<shape>& input) const;
* void finalize(context& ctx,const shape& output,const std::vector<shape>& input) ;
* shape compute_shape(const std::vector<shape>& input) const;
* argument compute(context& ctx,const shape& output,const std::vector<argument>& input) const;
* argument compute(const shape& output,const std::vector<argument>& input) const;
* friend std::ostream & operator<<(std::ostream & os,const operation & op) ;
* friend bool operator==(const operation & x,const operation & y) ;
* };
......@@ -210,12 +313,30 @@ struct operation
return (*this).private_detail_te_get_handle().name();
}
bool is_context_free() const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().is_context_free();
}
bool has_finalize() const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().has_finalize();
}
int output_alias(const std::vector<shape>& input) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().output_alias(input);
}
void finalize(context& ctx, const shape& output, const std::vector<shape>& input)
{
assert((*this).private_detail_te_handle_mem_var);
(*this).private_detail_te_get_handle().finalize(ctx, output, input);
}
shape compute_shape(const std::vector<shape>& input) const
{
assert((*this).private_detail_te_handle_mem_var);
......@@ -228,6 +349,12 @@ struct operation
return (*this).private_detail_te_get_handle().compute(ctx, output, input);
}
argument compute(const shape& output, const std::vector<argument>& input) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().compute(output, input);
}
friend std::ostream& operator<<(std::ostream& os, const operation& op)
{
assert(op.private_detail_te_handle_mem_var);
......@@ -240,6 +367,12 @@ struct operation
return x.private_detail_te_get_handle().operator==(y);
}
friend bool is_shared(const operation& private_detail_x, const operation& private_detail_y)
{
return private_detail_x.private_detail_te_handle_mem_var ==
private_detail_y.private_detail_te_handle_mem_var;
}
private:
struct private_detail_te_handle_base_type
{
......@@ -247,13 +380,18 @@ struct operation
virtual std::shared_ptr<private_detail_te_handle_base_type> clone() const = 0;
virtual const std::type_info& type() const = 0;
virtual std::string name() const = 0;
virtual int output_alias(const std::vector<shape>& input) const = 0;
virtual shape compute_shape(const std::vector<shape>& input) const = 0;
virtual std::string name() const = 0;
virtual bool is_context_free() const = 0;
virtual bool has_finalize() const = 0;
virtual int output_alias(const std::vector<shape>& input) const = 0;
virtual void
finalize(context& ctx, const shape& output, const std::vector<shape>& input) = 0;
virtual shape compute_shape(const std::vector<shape>& input) const = 0;
virtual argument
compute(context& ctx, const shape& output, const std::vector<argument>& input) const = 0;
virtual std::ostream& operator_shift_left(std::ostream& os) const = 0;
virtual bool operator==(const operation& y) const = 0;
compute(context& ctx, const shape& output, const std::vector<argument>& input) const = 0;
virtual argument compute(const shape& output, const std::vector<argument>& input) const = 0;
virtual std::ostream& operator_shift_left(std::ostream& os) const = 0;
virtual bool operator==(const operation& y) const = 0;
};
template <typename PrivateDetailTypeErasedT>
......@@ -286,12 +424,26 @@ struct operation
std::string name() const override { return private_detail_te_value.name(); }
bool is_context_free() const override
{
return is_context_free_op(private_detail_te_value);
}
bool has_finalize() const override { return has_finalize_op(private_detail_te_value); }
int output_alias(const std::vector<shape>& input) const override
{
return output_alias_op(private_detail_te_value, input);
}
void finalize(context& ctx, const shape& output, const std::vector<shape>& input) override
{
finalize_op(private_detail_te_value, ctx, output, input);
}
shape compute_shape(const std::vector<shape>& input) const override
{
......@@ -306,6 +458,12 @@ struct operation
return compute_op(private_detail_te_value, ctx, output, input);
}
argument compute(const shape& output, const std::vector<argument>& input) const override
{
return compute_op(private_detail_te_value, output, input);
}
std::ostream& operator_shift_left(std::ostream& os) const override
{
using migraphx::operation_stream::operator<<;
......@@ -385,6 +543,22 @@ inline const ValueType& any_cast(const operation& x)
inline bool operator!=(const operation& x, const operation& y) { return !(x == y); }
inline bool is_context_free(const operation& op) { return op.is_context_free(); }
template <class T>
bool is_context_free(const T& x)
{
return is_context_free_op(x);
}
inline bool has_finalize(const operation& op) { return op.has_finalize(); }
template <class T>
bool has_finalize(const T& x)
{
return has_finalize_op(x);
}
#endif
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -6,6 +6,8 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
......@@ -16,7 +18,7 @@ namespace op {
struct not_computable
{
argument compute(context&, const shape&, const std::vector<argument>&) const
argument compute(const shape&, const std::vector<argument>&) const
{
MIGRAPHX_THROW("not computable");
}
......@@ -63,6 +65,7 @@ struct convolution
valid
};
padding_mode_t padding_mode = default_;
int group = 1;
template <class Self, class F>
static auto reflect(Self& self, F f)
......@@ -70,7 +73,8 @@ struct convolution
return pack(f(self.padding, "padding"),
f(self.stride, "stride"),
f(self.dilation, "dilation"),
f(self.padding_mode, "padding_mode"));
f(self.padding_mode, "padding_mode"),
f(self.group, "group"));
}
std::string name() const { return "convolution"; }
......@@ -296,7 +300,7 @@ struct transpose
}
return {t, output_lens, output_strides};
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.front().data)};
}
......@@ -370,6 +374,27 @@ struct concat
new_lens[axis] = new_dim_axis;
return {type, new_lens};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
std::vector<std::size_t> coffsets = compute_offsets(output_shape, args);
for(std::size_t l = 0; l < args.size(); l++)
{
auto argl = args[l];
std::size_t nelements = argl.get_shape().elements();
visit_all(result, argl)([&](auto output, auto input) {
auto slice_shape =
shape{output_shape.type(), input.get_shape().lens(), output_shape.strides()};
auto slice = make_view(slice_shape, output.data() + coffsets[l]);
// cppcheck-suppress useStlAlgorithm
for(std::size_t i = 0; i < nelements; i++)
{
slice[i] = input[i];
}
});
}
return result;
}
int output_alias(const std::vector<shape>&) const { return 0; }
};
......@@ -437,7 +462,7 @@ struct slice
}
return shape{t, new_lens, old_strides};
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
auto input = args[0];
auto offset = compute_offset(input.get_shape()) * output_shape.type_size();
......@@ -487,7 +512,7 @@ struct squeeze
}
return shape{type, new_lens};
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.front().data)};
}
......@@ -526,7 +551,7 @@ struct unsqueeze
}
return shape{type, new_lens};
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.front().data)};
}
......@@ -578,13 +603,91 @@ struct reshape
MIGRAPHX_THROW("Wrong number of elements for reshape");
return s;
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.front().data)};
}
int output_alias(const std::vector<shape>&) const { return 0; }
};
struct as_shape
{
shape s;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.s, "shape"));
}
std::string name() const { return "as_shape"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(1).standard();
assert(inputs.front().elements() == s.elements());
return s;
}
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.front().data)};
}
int output_alias(const std::vector<shape>&) const { return 0; }
};
struct gather
{
std::size_t axis = 0;
std::string name() const { return "gather"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2);
auto lens = inputs[0].lens();
if(axis >= lens.size())
{
MIGRAPHX_THROW("Gather, axis is out of range.");
}
auto type = inputs[0].type();
lens[axis] = inputs[1].elements();
return {type, lens};
}
template <class T>
void compute_index(const T& out_idx,
const std::vector<std::size_t>& vec_indices,
const std::size_t max_dim,
T& in_idx) const
{
in_idx = out_idx;
std::size_t idx = vec_indices.at(out_idx[axis]);
if(idx >= max_dim)
{
MIGRAPHX_THROW("Gather: indices are out of range in input tensor");
}
in_idx[axis] = idx;
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
// max dimension in axis
std::size_t max_dim = args[0].get_shape().lens()[axis];
std::vector<std::size_t> vec_indices;
args[1].visit([&](auto indices) { vec_indices.assign(indices.begin(), indices.end()); });
visit_all(result, args[0])([&](auto output, auto input) {
std::vector<std::size_t> in_idx;
shape_for_each(output.get_shape(), [&](const auto& idx) {
this->compute_index(idx, vec_indices, max_dim, in_idx);
output(idx.begin(), idx.end()) = input(in_idx.begin(), in_idx.end());
});
});
return result;
}
int output_alias(const std::vector<shape>&) const { return 0; }
};
struct dot
{
float alpha = 1.0;
......@@ -624,7 +727,7 @@ struct identity
{
std::string name() const { return "identity"; }
shape compute_shape(std::vector<shape> inputs) const { return inputs.at(0); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.at(0).data)};
}
......@@ -742,7 +845,7 @@ struct flatten
std::accumulate(lens.begin() + axis, lens.end(), std::size_t{1}, std::multiplies<>{});
return {inputs.at(0).type(), {x, y}};
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.front().data)};
}
......@@ -794,7 +897,7 @@ struct broadcast
return {t, broadcast_shape.lens(), std::move(bcast_strides)};
}
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.at(0).data)};
}
......@@ -836,7 +939,7 @@ struct multibroadcast
}
return {t, output_lens, bcast_strides};
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.at(0).data)};
}
......@@ -858,7 +961,7 @@ struct scalar
return {t, scalar_bcast.lens(), strides};
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
argument compute(shape output_shape, std::vector<argument> args) const
{
return {std::move(output_shape), std::move(args.at(0).data)};
}
......@@ -923,7 +1026,7 @@ struct load
check_shapes{inputs}.has(1);
return s;
}
argument compute(context&, const shape&, const std::vector<argument>& args) const
argument compute(const shape&, const std::vector<argument>& args) const
{
return {s, args[0].data() + offset};
}
......@@ -946,10 +1049,7 @@ struct outline
check_shapes{inputs, *this}.has(0);
return s;
}
argument compute(context&, const shape&, const std::vector<argument>&) const
{
return {s, nullptr};
}
argument compute(const shape&, const std::vector<argument>&) const { return {s, nullptr}; }
};
} // namespace op
......
#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_DFOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_PAR_DFOR_HPP
#include <migraphx/par_for.hpp>
#include <migraphx/functional.hpp>
#include <array>
#include <numeric>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
template <class... Ts>
auto par_dfor(Ts... xs)
{
return [=](auto f) {
using array_type = std::array<std::size_t, sizeof...(Ts)>;
array_type lens = {{static_cast<std::size_t>(xs)...}};
auto n = std::accumulate(lens.begin(), lens.end(), 1, std::multiplies<std::size_t>{});
const std::size_t min_grain = 8;
if(n > 2 * min_grain)
{
array_type strides;
strides.fill(1);
std::partial_sum(lens.rbegin(),
lens.rend() - 1,
strides.rbegin() + 1,
std::multiplies<std::size_t>());
auto size =
std::accumulate(lens.begin(), lens.end(), 1, std::multiplies<std::size_t>());
par_for(size, min_grain, [&](std::size_t i) {
array_type indices;
std::transform(strides.begin(),
strides.end(),
lens.begin(),
indices.begin(),
[&](size_t stride, size_t len) { return (i / stride) % len; });
migraphx::unpack(f, indices);
});
}
else
{
dfor(xs...)(f);
}
};
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
#include <thread>
#include <cmath>
#include <algorithm>
#include <vector>
#include <cassert>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct joinable_thread : std::thread
{
template <class... Xs>
joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...) // NOLINT
{
}
joinable_thread& operator=(joinable_thread&& other) = default;
joinable_thread(joinable_thread&& other) = default;
~joinable_thread()
{
if(this->joinable())
this->join();
}
};
template <class F>
void par_for_impl(std::size_t n, std::size_t threadsize, F f)
{
if(threadsize <= 1)
{
for(std::size_t i = 0; i < n; i++)
f(i);
}
else
{
std::vector<joinable_thread> threads(threadsize);
// Using const here causes gcc 5 to ICE
#if(!defined(__GNUC__) || __GNUC__ != 5)
const
#endif
std::size_t grainsize = std::ceil(static_cast<double>(n) / threads.size());
std::size_t work = 0;
std::generate(threads.begin(), threads.end(), [=, &work] {
auto result = joinable_thread([=] {
std::size_t start = work;
std::size_t last = std::min(n, work + grainsize);
for(std::size_t i = start; i < last; i++)
{
f(i);
}
});
work += grainsize;
return result;
});
assert(work >= n);
}
}
template <class F>
void par_for(std::size_t n, std::size_t min_grain, F f)
{
const auto threadsize =
std::min<std::size_t>(std::thread::hardware_concurrency(), n / min_grain);
par_for_impl(n, threadsize, f);
}
template <class F>
void par_for(std::size_t n, F f)
{
const int min_grain = 8;
par_for(n, min_grain, f);
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -105,7 +105,13 @@ struct pass
void apply(program& p) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().apply(p);
(*this).private_detail_te_get_handle().apply(p);
}
friend bool is_shared(const pass& private_detail_x, const pass& private_detail_y)
{
return private_detail_x.private_detail_te_handle_mem_var ==
private_detail_y.private_detail_te_handle_mem_var;
}
private:
......@@ -149,7 +155,7 @@ struct pass
std::string name() const override { return private_detail_te_value.name(); }
void apply(program& p) const override { return private_detail_te_value.apply(p); }
void apply(program& p) const override { private_detail_te_value.apply(p); }
PrivateDetailTypeErasedT private_detail_te_value;
};
......
......@@ -91,10 +91,14 @@ struct program
shape get_shape() const;
context& get_context() const;
instruction_ref validate() const;
void compile(const target& t, tracer trace = tracer{});
void finalize();
void perf_report(std::ostream& os, std::size_t n, parameter_map params) const;
void debug_print() const;
......
......@@ -127,6 +127,12 @@ struct target
return (*this).private_detail_te_get_handle().get_context();
}
friend bool is_shared(const target& private_detail_x, const target& private_detail_y)
{
return private_detail_x.private_detail_te_handle_mem_var ==
private_detail_y.private_detail_te_handle_mem_var;
}
private:
struct private_detail_te_handle_base_type
{
......
......@@ -162,25 +162,54 @@ void instruction::replace_argument(instruction_ref old, instruction_ref new_ins)
old->remove_output(*this);
}
std::vector<shape> compute_shapes(const std::vector<instruction_ref>& args)
argument instruction::eval() const
{
std::vector<shape> shapes(args.size());
std::transform(
args.begin(), args.end(), shapes.begin(), [](instruction_ref i) { return i->get_shape(); });
return shapes;
if(op.name() == "@literal")
{
return this->get_literal().get_argument();
}
if(is_context_free(op))
{
std::vector<argument> args;
for(auto&& arg : this->inputs())
{
argument a = arg->eval();
if(a.empty())
return {};
args.push_back(a);
}
return op.compute(result, args);
}
return {};
}
instruction_ref instruction::get_output_alias(instruction_ref ins)
void instruction::finalize(context& ctx)
{
auto i = ins->get_operator().output_alias(compute_shapes(ins->inputs()));
if(has_finalize(this->op))
this->op.finalize(ctx, this->get_shape(), to_shapes(this->inputs()));
}
instruction_ref instruction::get_output_alias(instruction_ref ins, bool shallow)
{
auto i = ins->get_operator().output_alias(to_shapes(ins->inputs()));
if(i < 0)
return ins;
if(shallow)
return ins->inputs().at(i);
return get_output_alias(ins->inputs().at(i));
}
std::vector<shape> to_shapes(const std::vector<instruction_ref>& args)
{
std::vector<shape> shapes(args.size());
std::transform(
args.begin(), args.end(), shapes.begin(), [](instruction_ref i) { return i->get_shape(); });
return shapes;
}
shape compute_shape(const operation& op, const std::vector<instruction_ref>& args)
{
return op.compute_shape(compute_shapes(args));
return op.compute_shape(to_shapes(args));
}
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -80,6 +80,9 @@ struct onnx_parser
add_mem_op("Unsqueeze", &onnx_parser::parse_unsqueeze);
add_mem_op("Slice", &onnx_parser::parse_slice);
add_mem_op("Concat", &onnx_parser::parse_concat);
add_mem_op("Gather", &onnx_parser::parse_gather);
add_mem_op("Shape", &onnx_parser::parse_shape);
add_mem_op("ConstantFill", &onnx_parser::parse_constant_fill);
add_mem_op("Transpose", &onnx_parser::parse_transpose);
}
......@@ -148,7 +151,7 @@ struct onnx_parser
if(s0->size() > s1->size())
std::swap(s0, s1);
std::vector<std::size_t> output_lens(s1->size());
std::vector<std::size_t> output_lens(*s1);
auto offset = s1->size() - s0->size();
std::transform(s0->begin(),
s0->end(),
......@@ -241,6 +244,10 @@ struct onnx_parser
op.padding_mode = op::convolution::same;
}
}
if(contains(attributes, "group"))
{
op.group = parse_value(attributes.at("group")).at<int>();
}
if(args.size() == 3)
{
uint64_t axis = 1;
......@@ -350,6 +357,18 @@ struct onnx_parser
return prog.add_instruction(op, std::move(args));
}
instruction_ref
parse_gather(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
std::size_t axis = 0;
if(contains(attributes, "axis"))
{
axis = parse_value(attributes.at("axis")).at<int>();
}
op::gather op{axis};
return prog.add_instruction(op, std::move(args));
}
instruction_ref
parse_slice(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
......@@ -382,7 +401,7 @@ struct onnx_parser
parse_gemm(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
float alpha = 1.0f;
float beta = 0.0f;
float beta = 1.0f;
bool transa = false;
bool transb = false;
if(contains(attributes, "alpha"))
......@@ -406,10 +425,20 @@ struct onnx_parser
auto l2 = (transb) ? prog.add_instruction(op::transpose{perm}, args[1]) : args[1];
if(args.size() == 3)
{
uint64_t axis = 1;
auto l3 = prog.add_instruction(op::dot{alpha, beta}, l1, l2);
auto l4 = prog.add_instruction(op::broadcast{axis, l3->get_shape()}, args[2]);
return prog.add_instruction(op::add{}, l3, l4);
if(beta != 0.f)
{
auto l3 = prog.add_instruction(op::dot{alpha}, l1, l2);
auto l4 = args[2];
if(l4->get_shape().scalar()) // ignore args[2] (no C value added to alpha*A*B)
return l3;
if(beta != 1.f)
{
auto beta_val = prog.add_literal(beta);
auto l5 = prog.add_instruction(op::scalar{args[2]->get_shape()}, beta_val);
l4 = prog.add_instruction(op::mul{}, args[2], l5);
}
return add_broadcastable_binary_op(l3, l4, op::add{});
}
}
return prog.add_instruction(op::dot{alpha, beta}, l1, l2);
}
......@@ -509,6 +538,99 @@ struct onnx_parser
return prog.add_instruction(migraphx::op::transpose{perm}, args.front());
}
// Use a literal instruction to replace the shape since, output of
// shape operator are literals in migraphx
instruction_ref
parse_shape(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
{
if(args.size() != 1)
MIGRAPHX_THROW("Shape: operator should have 1 operand");
std::vector<std::size_t> arg_shape = args[0]->get_shape().lens();
std::vector<int64_t> vec_shape(arg_shape.size());
migraphx::shape s(migraphx::shape::int64_type, {arg_shape.size()});
std::transform(arg_shape.begin(), arg_shape.end(), vec_shape.begin(), [](auto i) {
return int64_t(i);
});
return prog.add_literal(migraphx::literal{s, vec_shape});
}
// Use a literal instruction to replace the constantFill operator. In RNN, input shape
// and value are fixed, so no need to do the actual computation for the constantFill
// operator
instruction_ref parse_constant_fill(const std::string&,
attribute_map attributes,
std::vector<instruction_ref> args)
{
int input_as_shape = 0;
int dtype = 1;
float value = 0.0f;
if(contains(attributes, "dtype"))
{
dtype = parse_value(attributes.at("dtype")).at<int>();
}
migraphx::shape::type_t type = get_type(dtype);
if(contains(attributes, "input_as_shape"))
{
input_as_shape = parse_value(attributes.at("input_as_shape")).at<int>();
}
if(contains(attributes, "value"))
{
value = parse_value(attributes.at("value")).at<float>();
}
if(contains(attributes, "extra_shape"))
{
MIGRAPHX_THROW("ConstantFill: cannot handle extra shape attribute");
}
if(input_as_shape == 1)
{
if(args.size() != 1)
{
MIGRAPHX_THROW("ConstantFill: need an input argument as output shape");
}
if(contains(attributes, "shape"))
{
MIGRAPHX_THROW("ConstantFill: cannot set the shape argument and pass in an input "
"at the same time");
}
migraphx::argument in = args[0]->eval();
if(in.empty())
{
MIGRAPHX_THROW("ConstantFill: cannot handle dynamic shape as input");
}
std::vector<std::size_t> dims;
in.visit([&](auto input) { dims.assign(input.begin(), input.end()); });
migraphx::shape s(type, dims);
std::vector<float> values(s.elements(), value);
return prog.add_literal(migraphx::literal(s, values));
}
else if(input_as_shape == 0)
{
if(!contains(attributes, "shape"))
{
MIGRAPHX_THROW("ConstantFill: attribute output shape is needed");
}
literal ls = parse_value(attributes.at("shape"));
std::vector<std::size_t> dims;
ls.visit([&](auto s) { dims.assign(s.begin(), s.end()); });
migraphx::shape s{type, dims};
std::vector<float> values(s.elements(), value);
return prog.add_literal(migraphx::literal(s, values));
}
else
{
MIGRAPHX_THROW("ConstantFill: wrong value of attribute input_as_shape");
}
}
void parse_from(std::istream& is)
{
onnx::ModelProto model;
......@@ -758,6 +880,28 @@ struct onnx_parser
});
return {shape_type, dims};
}
shape::type_t get_type(int dtype)
{
switch(dtype)
{
case 1: return shape::float_type;
case 2: return shape::uint8_type;
case 3: return shape::int8_type;
case 4: return shape::uint16_type;
case 5: return shape::int16_type;
case 6: return shape::int32_type;
case 7: return shape::int64_type;
case 10: return shape::half_type;
case 11: return shape::double_type;
case 12: return shape::uint32_type;
case 13: return shape::uint64_type;
default:
{
MIGRAPHX_THROW("Prototensor data type " + std::to_string(dtype) + " not supported");
}
}
}
};
program parse_onnx(const std::string& name)
......
......@@ -271,6 +271,8 @@ instruction_ref program::end() const { return impl->instructions.end(); }
shape program::get_shape() const { return impl->instructions.back().get_shape(); }
context& program::get_context() const { return impl->ctx; }
instruction_ref program::validate() const
{
return std::find_if(impl->instructions.begin(),
......@@ -309,6 +311,15 @@ void program::compile(const target& t, tracer trace)
auto index = std::distance(impl->instructions.begin(), invalid);
MIGRAPHX_THROW("Invalid program from compilation at instruction " + std::to_string(index));
}
this->finalize();
}
void program::finalize()
{
for(auto ins : iterator_for(*this))
{
ins->finalize(this->impl->ctx);
}
}
template <class F>
......
......@@ -9,7 +9,18 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
bool is_reshaper(const std::string& name)
// Reshapers that can't handle nonstandard input shapes
bool is_nonstandard_reshaper(instruction_ref ins)
{
// clang-format off
static const std::unordered_set<std::string> names = {
"reshape"
};
// clang-format on
return contains(names, ins->name()) and ins->inputs().front()->name() == "contiguous";
}
bool is_reshaper(instruction_ref ins)
{
// clang-format off
static const std::unordered_set<std::string> names = {
......@@ -19,26 +30,27 @@ bool is_reshaper(const std::string& name)
"contiguous"
};
// clang-format on
return contains(names, name);
return contains(names, ins->name()) and not is_nonstandard_reshaper(ins);
}
void simplify_reshapes::apply(program& p) const
{
for(auto ins : iterator_for(p))
{
if(not is_reshaper(ins->name()))
if(not is_reshaper(ins))
continue;
if(ins->outputs().size() != 1)
continue;
if(is_reshaper(ins->outputs().front()->name()))
if(is_reshaper(ins->outputs().front()))
continue;
// Gather reshapes
std::vector<instruction_ref> reshapes{ins};
while(is_reshaper(reshapes.back()->name()))
while(is_reshaper(reshapes.back()))
{
assert(!reshapes.back()->inputs().empty());
assert(p.has_instruction(reshapes.back()->inputs().front()));
reshapes.push_back(reshapes.back()->inputs().front());
auto input = reshapes.back()->inputs().front();
reshapes.push_back(input);
}
std::pair<instruction_ref, instruction_ref> r{p.end(), p.end()};
......@@ -58,6 +70,13 @@ void simplify_reshapes::apply(program& p) const
p.replace_instruction(r.first, r.second);
}
}
// Replace all reshapes with as_shape
for(auto ins : iterator_for(p))
{
if(ins->name() != "reshape")
continue;
p.replace_instruction(ins, op::as_shape{ins->get_shape()}, ins->inputs());
}
}
} // namespace MIGRAPHX_INLINE_NS
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment