Commit ef5d7092 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX...

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into refine_eliminate_contiguous
parents 2e04ea47 b2051bbc
......@@ -57,7 +57,7 @@ struct transpose
{
return {std::move(output_shape), std::move(args.front().data)};
}
int output_alias(const std::vector<shape>&) const { return 0; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
} // namespace op
......
......@@ -56,7 +56,7 @@ struct unsqueeze
{
return {std::move(output_shape), std::move(args.front().data)};
}
int output_alias(const std::vector<shape>&) const { return 0; }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
};
} // namespace op
......
......@@ -49,7 +49,7 @@ struct operation
argument compute(context& ctx, const shape& output, const std::vector<argument>& input) const;
/// An optional method to return which argument the output will alias. If
/// there is no aliased output then -1 can be returned.
int output_alias(const std::vector<shape>& input) const;
std::ptrdiff_t output_alias(const std::vector<shape>& input) const;
/// An optional stream operator to print the operation. When this is not
/// implemented, it will just print the operation's name.
friend std::ostream& operator<<(std::ostream& os, const operation& op);
......@@ -175,7 +175,7 @@ auto is_context_free_op(const T& x) -> decltype(is_context_free_op(
}
template <class T>
int output_alias_op(rank<0>, const T&, const std::vector<shape>&)
std::ptrdiff_t output_alias_op(rank<0>, const T&, const std::vector<shape>&)
{
return -1;
}
......@@ -188,7 +188,7 @@ auto output_alias_op(rank<1>, const T& x, const std::vector<shape>& shapes)
}
template <class T>
int output_alias_op(const T& x, const std::vector<shape>& shapes)
std::ptrdiff_t output_alias_op(const T& x, const std::vector<shape>& shapes)
{
return output_alias_op(rank<1>{}, x, shapes);
}
......@@ -239,7 +239,7 @@ auto has_finalize_op(const T&) -> decltype(has_finalize_op(rank<1>{},
* std::string name() const;
* bool is_context_free() const;
* bool has_finalize() const;
* int output_alias(const std::vector<shape>& input) const;
* std::ptrdiff_t output_alias(const std::vector<shape>& input) const;
* void finalize(context& ctx,const shape& output,const std::vector<shape>& input) ;
* shape compute_shape(const std::vector<shape>& input) const;
* argument compute(context& ctx,const shape& output,const std::vector<argument>& input) const;
......@@ -325,7 +325,7 @@ struct operation
return (*this).private_detail_te_get_handle().has_finalize();
}
int output_alias(const std::vector<shape>& input) const
std::ptrdiff_t output_alias(const std::vector<shape>& input) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().output_alias(input);
......@@ -380,10 +380,10 @@ struct operation
virtual std::shared_ptr<private_detail_te_handle_base_type> clone() const = 0;
virtual const std::type_info& type() const = 0;
virtual std::string name() const = 0;
virtual bool is_context_free() const = 0;
virtual bool has_finalize() const = 0;
virtual int output_alias(const std::vector<shape>& input) const = 0;
virtual std::string name() const = 0;
virtual bool is_context_free() const = 0;
virtual bool has_finalize() const = 0;
virtual std::ptrdiff_t output_alias(const std::vector<shape>& input) const = 0;
virtual void
finalize(context& ctx, const shape& output, const std::vector<shape>& input) = 0;
virtual shape compute_shape(const std::vector<shape>& input) const = 0;
......@@ -432,7 +432,7 @@ struct operation
bool has_finalize() const override { return has_finalize_op(private_detail_te_value); }
int output_alias(const std::vector<shape>& input) const override
std::ptrdiff_t output_alias(const std::vector<shape>& input) const override
{
return output_alias_op(private_detail_te_value, input);
......
......@@ -38,8 +38,9 @@ inline std::string join_strings(Strings strings, const std::string& delim)
return "";
auto nit = std::next(it);
return std::accumulate(
nit, strings.end(), *it, [&](std::string x, std::string y) { return x + delim + y; });
return std::accumulate(nit, strings.end(), *it, [&](std::string x, std::string y) {
return std::move(x) + delim + std::move(y);
});
}
template <class F>
......
......@@ -162,7 +162,24 @@ void instruction::replace_argument(instruction_ref old, instruction_ref new_ins)
old->remove_output(*this);
}
argument instruction::eval() const
bool instruction::can_eval() const
{
if(op.name() == "@literal")
{
return true;
}
else if(is_context_free(op))
{
return std::all_of(
this->inputs().begin(), this->inputs().end(), [](auto arg) { return arg->can_eval(); });
}
else
{
return false;
}
}
argument instruction::eval(bool check_eval) const
{
if(op.name() == "@literal")
{
......@@ -170,14 +187,13 @@ argument instruction::eval() const
}
if(is_context_free(op))
{
if(check_eval and not this->can_eval())
return {};
std::vector<argument> args;
for(auto&& arg : this->inputs())
{
argument a = arg->eval();
if(a.empty())
return {};
args.push_back(a);
}
std::transform(this->inputs().begin(),
this->inputs().end(),
std::back_inserter(args),
[](auto arg) { return arg->eval(false); });
return op.compute(result, args);
}
return {};
......
......@@ -32,7 +32,7 @@ auto read_cifar10_images(const std::string& full_path)
labels[i] = *pimage++;
for(size_t j = 0; j < nbytes_per_image; j++)
{
float v = *(pimage + j) / 255.0f;
float v = float(*(pimage + j)) / 255.0f;
data[i * nbytes_per_image + j] = v;
}
}
......
......@@ -207,7 +207,7 @@ struct onnx_parser
template <class T>
void add_generic_op(std::string name, T x)
{
add_op(name, [this, x](attribute_map, std::vector<instruction_ref> args) {
add_op(name, [this, x](const attribute_map&, std::vector<instruction_ref> args) {
return prog.add_instruction(x, args);
});
}
......@@ -215,7 +215,7 @@ struct onnx_parser
template <class T>
void add_variadic_op(std::string name, T x)
{
add_op(name, [this, x](attribute_map, std::vector<instruction_ref> args) {
add_op(name, [this, x](const attribute_map&, std::vector<instruction_ref> args) {
return std::accumulate(std::next(args.begin()),
args.end(),
args.front(),
......
......@@ -63,11 +63,11 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
}
}
long long offset = 0;
std::size_t offset = 0;
while(!conflict_queue.empty())
{
live_range* range = conflict_queue.top();
long long iter_offset = range->offset;
live_range* range = conflict_queue.top();
std::size_t iter_offset = range->offset;
if(offset > iter_offset)
{
offset = std::max(offset, iter_offset + range->size);
......@@ -97,7 +97,7 @@ void memory_coloring_impl::build()
if(num_of_instrs == 0)
return;
int cur_points = num_of_instrs * 2;
auto cur_points = num_of_instrs * 2;
instruction_ref iter = p_program->end();
instruction_ref begin = p_program->begin();
std::vector<instruction_ref> dead_instrs;
......@@ -193,13 +193,13 @@ void memory_coloring_impl::rewrite()
continue;
std::size_t offset = 0;
if(interval->get_offset() == invalid_offset)
if(interval->get_offset() != invalid_offset)
{
assert(interval->result.bytes() == 0);
offset = interval->get_offset();
}
else
{
offset = interval->get_offset();
assert(interval->result.bytes() == 0);
}
if(is_allocate(ins))
......@@ -207,15 +207,6 @@ void memory_coloring_impl::rewrite()
p_program->replace_instruction(
ins, op::load{ins->get_shape(), offset}, scratch_param);
}
else if(is_literal(ins))
{
#if 0
auto pre = p_program->add_literal(ins->lit);
bool pre_copy = (interval->get_begin() < earliest_end_point);
p_program->replace_instruction(
ins, write_literal{offset, pre_copy}, scratch_param, pre);
#endif
}
}
}
MIGRAPHX_DEBUG(dump("---After rewrite---"));
......
......@@ -21,15 +21,15 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
static const int invalid_offset = -1;
static const std::size_t invalid_offset = std::numeric_limits<std::size_t>::max();
struct live_range
{
int begin; // begin point in the instruction stream.
int end; // end point in the instruction stream.
long long offset; // offset to base pointer of allocated memory trunk.
int vn; // value number that identifies this live_range.
long long size; // size of required memory in bytes
std::size_t begin; // begin point in the instruction stream.
std::size_t end; // end point in the instruction stream.
std::size_t offset; // offset to base pointer of allocated memory trunk.
std::size_t vn; // value number that identifies this live_range.
std::size_t size; // size of required memory in bytes
#ifdef MIGRAPHX_DEBUG_OPT
void dump();
#endif
......@@ -45,9 +45,9 @@ struct live_interval
is_live_on_entry = false;
}
void add_use(int use) { use_points.push_front(use); }
int get_begin() const { return segment.begin; }
int get_end() const { return segment.end; }
void add_use(std::size_t use) { use_points.push_front(use); }
std::size_t get_begin() const { return segment.begin; }
std::size_t get_end() const { return segment.end; }
long long get_offset() const { return segment.offset; }
#ifdef MIGRAPHX_DEBUG_OPT
......@@ -55,9 +55,9 @@ struct live_interval
#endif
live_range segment;
int id;
std::list<int> use_points;
int def_point;
std::size_t id;
std::list<std::size_t> use_points;
std::size_t def_point;
shape result;
bool is_literal;
bool is_live_on_entry;
......@@ -111,8 +111,8 @@ struct memory_coloring_impl
{
if((range1.size == 0) || (range2.size == 0))
return false;
long long end1 = range1.offset + range1.size - 1;
long long end2 = range2.offset + range2.size - 1;
auto end1 = range1.offset + range1.size - 1;
auto end2 = range2.offset + range2.size - 1;
return ((end1 < range2.offset) || (end2 < range1.offset));
}
void verify();
......@@ -125,8 +125,8 @@ struct memory_coloring_impl
{
bool operator()(const interval_ptr i1, const interval_ptr i2) const
{
int len1 = i1->get_end() - i1->get_begin();
int len2 = i2->get_end() - i2->get_begin();
auto len1 = i1->get_end() - i1->get_begin();
auto len2 = i2->get_end() - i2->get_begin();
if(len1 != len2)
{
return (len1 < len2);
......@@ -158,7 +158,7 @@ struct memory_coloring_impl
int num_of_lives;
int max_value_number;
long long required_bytes;
std::size_t required_bytes;
// The earliest program point where an live interval ends.
int earliest_end_point;
// The latest program point where an live interval ends.
......
......@@ -22,22 +22,32 @@ bool skip_propogate(instruction_ref ins)
void propagate_constant::apply(program& p) const
{
fix([&](auto self, auto ins) {
if(not skip_propogate(ins))
{
auto r = ins->eval();
if(not r.empty())
for(auto i : iterator_for(p))
{
if(i->name() != "@literal")
continue;
if(i->outputs().empty())
continue;
fix([&](auto self, auto ins) {
std::unordered_set<instruction_ref> children(ins->outputs().begin(),
ins->outputs().end());
for(auto child : children)
{
assert(r.get_shape() == ins->get_shape());
auto l = p.add_literal(r.get_shape(), r.data());
p.replace_instruction(ins, l);
return;
if(skip_propogate(child))
{
self(child);
continue;
}
auto r = child->eval();
if(not r.empty())
{
assert(r.get_shape() == child->get_shape());
auto l = p.add_literal(r.get_shape(), r.data());
self(p.replace_instruction(child, l));
}
}
}
std::unordered_set<instruction_ref> children(ins->inputs().begin(), ins->inputs().end());
for(auto child : children)
self(child);
})(std::prev(p.end()));
})(i);
}
}
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -117,7 +117,7 @@ struct cpu_lrn
int channels = output_shape.lens()[1];
int height = output_shape.lens()[2];
int width = output_shape.lens()[3];
float alphaoverarea = op.alpha / op.size;
float alphaoverarea = op.alpha / float(op.size);
int radius = (op.size - 1) / 2;
par_dfor(n_batch, height, width)([&](int b, int h, int w) {
......@@ -165,15 +165,15 @@ struct cpu_convolution
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
const int group_id = w / (wei_n / op.group);
const auto start_x = i * op.stride[0] - op.padding[0];
const auto start_y = j * op.stride[1] - op.padding[1];
const auto group_id = w / (wei_n / op.group);
double acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
const int in_ch = group_id * wei_c + k;
const auto in_x = start_x + x;
const auto in_y = start_y + y;
const auto in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
......@@ -209,10 +209,8 @@ struct cpu_im2col
const std::size_t& stride_h = op.stride[0];
const std::size_t& stride_w = op.stride[1];
int kdiv2_h;
int kdiv2_w;
kdiv2_h = kernel_h / 2;
kdiv2_w = kernel_w / 2;
auto kdiv2_h = kernel_h / 2;
auto kdiv2_w = kernel_w / 2;
// calculate output sizes
const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1;
......@@ -230,8 +228,8 @@ struct cpu_im2col
dfor(channels,
kernel_h,
kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
int idx = iinput + koffset - kdiv2_h;
int jdx = jinput + loffset - kdiv2_w;
auto idx = iinput + koffset - kdiv2_h;
auto jdx = jinput + loffset - kdiv2_w;
col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
? input(0, c, idx, jdx)
: 0;
......@@ -642,20 +640,20 @@ struct softmax2d
auto nw = input.get_shape().lens()[3];
dfor(nb, nh, nw)([&](std::size_t b, std::size_t i, std::size_t j) {
value_type cmax = std::numeric_limits<value_type>::lowest();
for(int c = 0; c < nc; c++)
for(std::size_t c = 0; c < nc; c++)
{
cmax = std::max(cmax, input(b, c, i, j));
}
for(int c = 0; c < nc; c++)
for(std::size_t c = 0; c < nc; c++)
{
output(b, c, i, j) = std::exp(input(b, c, i, j) - cmax);
}
value_type sum = value_type(0);
for(int c = 0; c < nc; c++)
for(std::size_t c = 0; c < nc; c++)
{
sum += output(b, c, i, j);
}
for(int c = 0; c < nc; c++)
for(std::size_t c = 0; c < nc; c++)
{
output(b, c, i, j) = output(b, c, i, j) / sum;
}
......
......@@ -16,7 +16,7 @@ argument gather(hipStream_t stream,
std::vector<migraphx::argument> args,
int axis)
{
int axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
auto axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
visit_all(args.back(), args[0])([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
args[1].visit([&](auto indices) {
......
......@@ -162,7 +162,10 @@ struct hip_triadd
device::add(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2));
return args.at(3);
}
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct hip_triadd_relu
......@@ -178,7 +181,10 @@ struct hip_triadd_relu
device::add_relu(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2));
return args.at(3);
}
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct hip_add_relu
......@@ -194,7 +200,10 @@ struct hip_add_relu
device::add_relu(ctx.get_stream().get(), args.at(2), args.at(0), args.at(1));
return args.at(2);
}
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct find_add_relu
......@@ -285,7 +294,10 @@ struct miopen_conv_bias
void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct miopen_conv_bias_relu
......@@ -332,7 +344,10 @@ struct miopen_conv_bias_relu
}
void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
template <class... Ms>
......
......@@ -17,7 +17,10 @@ struct miopen_abs
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -17,7 +17,10 @@ struct miopen_batch_norm_inference
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -18,7 +18,10 @@ struct hip_concat
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -16,7 +16,10 @@ struct miopen_contiguous
std::string name() const { return "gpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument compute(context&, shape output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -31,7 +31,10 @@ struct miopen_convolution
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -17,7 +17,10 @@ struct miopen_elu
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -18,7 +18,10 @@ struct hip_gather
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment