Commit 3d08f40f authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into scatter-op

parents 50cfbcda 6ba279cc
#include <migraphx/gpu/preallocate_param.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/program.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/module.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
void preallocate_param::apply(module& p) const
void preallocate_param::apply(module& m) const
{
for(auto ins : iterator_for(p))
for(auto ins : iterator_for(m))
{
if(ins->name() != "@param")
continue;
if(param != any_cast<builtin::param>(ins->get_operator()).parameter)
continue;
std::string id = p.name() + ":" + param;
auto r = p.insert_instruction(ins, hip_allocate_memory{ins->get_shape(), id});
p.replace_instruction(ins, r);
std::string id = m.name() + ":" + param;
auto r = m.insert_instruction(ins, model.preallocate(ins->get_shape(), id));
m.replace_instruction(ins, r);
}
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -19,6 +19,7 @@ add_library(migraphx_cpu
logsoftmax.cpp
lowering.cpp
lrn.cpp
preallocate.cpp
pooling.cpp
reduction.cpp
reorder.cpp
......
......@@ -11,6 +11,11 @@ operation cpu_allocation_model::allocate(const shape& s) const
return make_op(name(), {{"shape", to_value(s)}});
}
operation cpu_allocation_model::preallocate(const shape& s, const std::string& id) const
{
return make_op("cpu::preallocate", {{"shape", to_value(s)}, {"id", id}});
}
std::string cpu_allocation_model::copy() const { return "cpu::copy"; }
} // namespace cpu
......
......@@ -14,6 +14,7 @@ struct cpu_allocation_model
std::string name() const;
std::string copy() const;
operation allocate(const shape& s) const;
operation preallocate(const shape& s, const std::string& id) const;
};
} // namespace cpu
......
......@@ -440,7 +440,7 @@ struct cpu_apply
}
}
instruction_ref apply_pow(instruction_ref ins)
instruction_ref apply_pow(instruction_ref ins) const
{
auto beta = read_scalar<float>(ins->inputs()[1]);
if(beta.empty())
......@@ -451,7 +451,7 @@ struct cpu_apply
{ins->inputs().front()});
}
instruction_ref apply_pooling(instruction_ref ins)
instruction_ref apply_pooling(instruction_ref ins) const
{
auto&& op = ins->get_operator();
auto v = op.to_value();
......@@ -479,30 +479,20 @@ struct cpu_apply
return {r.at<T>()};
}
instruction_ref replace(instruction_ref ins, const operation& op)
instruction_ref replace(instruction_ref ins, const operation& op) const
{
return replace(ins, op, ins->inputs());
}
instruction_ref
replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs)
replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
{
inputs.push_back(insert_allocation(ins, ins->get_shape()));
return modl->replace_instruction(ins, op, inputs);
}
instruction_ref insert_allocation(instruction_ref ins, const shape& s)
instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
{
auto ins_alias = instruction::get_output_alias(ins);
if(last->name() == "@return" and prog_output_names.count(ins_alias) > 0)
{
return modl->add_parameter(prog_output_names[ins_alias], s);
}
else if(ins == last)
{
return modl->add_parameter("output", s);
}
return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}}));
}
};
......
#include <migraphx/config.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/context.hpp>
#include <migraphx/cpu/context.hpp>
#include <migraphx/register_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace cpu {
struct cpu_preallocate : auto_register_op<cpu_preallocate>
{
shape s;
std::string id = "";
argument data;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.s, "shape"), f(self.id, "id"));
}
std::string name() const { return "cpu::preallocate"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(0);
return s;
}
argument compute(context&, const shape&, const std::vector<argument>&) const { return data; }
void finalize(context&, const shape&, const std::vector<shape>&) { data = argument(s); }
lifetime get_lifetime() const { return lifetime::global; }
};
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -22,6 +22,7 @@
#include <migraphx/memory_coloring.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/cpu/fuse_ops.hpp>
#include <migraphx/cpu/write_literals.hpp>
#include <migraphx/cpu/allocation_model.hpp>
......@@ -76,6 +77,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
write_literals{},
dead_code_elimination{},
memory_coloring{"cpu::allocate"},
dead_code_elimination{},
preallocate_param{"scratch", cpu_allocation_model{}},
dead_code_elimination{}};
}
......
......@@ -61,6 +61,7 @@ add_library(migraphx_device
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/prefix_scan_sum.cpp
device/recip.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
......@@ -140,7 +141,6 @@ add_library(migraphx_gpu
pack_int8_args.cpp
pad.cpp
pooling.cpp
preallocate_param.cpp
quant_convolution.cpp
reverse.cpp
rnn_variable_seq_lens.cpp
......@@ -194,6 +194,7 @@ register_migraphx_gpu_ops(hip_
pad
pow
prelu
prefix_scan_sum
recip
reduce_max
reduce_mean
......
......@@ -11,6 +11,11 @@ operation gpu_allocation_model::allocate(const shape& s) const
return make_op(name(), {{"shape", to_value(s)}});
}
operation gpu_allocation_model::preallocate(const shape& s, const std::string& id) const
{
return make_op("hip::hip_allocate_memory", {{"shape", to_value(s)}, {"id", id}});
}
std::string gpu_allocation_model::copy() const { return "hip::copy"; }
} // namespace gpu
......
......@@ -5,6 +5,7 @@
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/visit.hpp>
#include <migraphx/gpu/device/multi_index.hpp>
#include <migraphx/gpu/device/reduce_ops.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -15,79 +16,6 @@ namespace device {
#define MIGRAPHX_NO_DPP
#endif
struct sum
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return x + y;
}
};
struct product
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return x * y;
}
};
struct id
{
template <class T>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const
{
return x;
}
};
struct mean
{
size_t item_num = 1;
template <class T>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const
{
return x / static_cast<T>(item_num);
}
};
struct max
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return (x > y) ? x : y;
}
};
struct min
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return (x < y) ? x : y;
}
};
struct lowest
{
template <class T>
__device__ __host__ operator T() const
{
return device_cast(std::numeric_limits<host_type<T>>::lowest());
}
};
struct highest
{
template <class T>
__device__ __host__ operator T() const
{
return device_cast(std::numeric_limits<host_type<T>>::max());
}
};
#ifdef MIGRAPHX_NO_DPP
template <index_int N,
class Op,
......
#ifndef MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP
#define MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
struct sum
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return x + y;
}
};
struct product
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return x * y;
}
};
struct id
{
template <class T>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const
{
return x;
}
};
struct mean
{
size_t item_num = 1;
template <class T>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const
{
return x / static_cast<T>(item_num);
}
};
struct max
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return (x > y) ? x : y;
}
};
struct min
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return (x < y) ? x : y;
}
};
struct lowest
{
template <class T>
__device__ __host__ operator T() const
{
return device_cast(std::numeric_limits<host_type<T>>::lowest());
}
};
struct highest
{
template <class T>
__device__ __host__ operator T() const
{
return device_cast(std::numeric_limits<host_type<T>>::max());
}
};
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP
#ifndef MIGRAPHX_GUARD_DEVICE_SCAN_HPP
#define MIGRAPHX_GUARD_DEVICE_SCAN_HPP
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/visit.hpp>
#include <migraphx/gpu/device/multi_index.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
template <index_int N,
class Op,
class T,
class ForStride,
class Input,
class Output,
MIGRAPHX_REQUIRES(not std::is_integral<ForStride>{})>
__device__ void block_scan(index idx, Op op, T init, ForStride fs, Input input, Output output)
{
using type = decltype(input(deduce_for_stride(fs)));
MIGRAPHX_DEVICE_SHARED type buffer[N];
type x = init;
fs([&](auto i) {
if(idx.local == 0)
buffer[idx.local] = op(input(i), x);
else
buffer[idx.local] = input(i);
__syncthreads();
for(index_int s = 1; s < idx.nlocal(); s *= 2)
{
if(idx.local + s < idx.nlocal())
{
buffer[idx.local + s] = op(buffer[idx.local], buffer[idx.local + s]);
}
__syncthreads();
}
x = buffer[idx.nlocal() - 1];
output(i, buffer[idx.local]);
});
}
template <index_int N, class Op, class T, class Input, class Output>
__device__ void block_scan(index idx, Op op, T init, index_int n, Input input, Output output)
{
block_scan<N>(idx,
op,
init,
[&](auto f) -> decltype(f(index_int{})) { return idx.local_stride(n, f); },
input,
output);
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_DEVICE_SCAN_HPP
#include <migraphx/gpu/device/prefix_scan_sum.hpp>
#include <migraphx/gpu/device/scan.hpp>
#include <migraphx/gpu/device/reduce_ops.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void prefix_scan_sum(hipStream_t stream, const argument& result, const argument& arg, int32_t axis)
{
const index_int block_size = 256;
const index_int n = arg.get_shape().lens()[axis];
auto rlens = result.get_shape().lens();
rlens[axis] = 1;
hip_visit_all(result, arg, result.get_shape().with_lens(rlens))(
[=](auto output, auto input, auto rshape) {
gs_launch(stream, rshape.elements() * block_size, block_size)(
[=](auto i, auto idx) __device__ {
const auto ridx = rshape.multi(i / block_size);
auto compute_idx = [&](auto j) {
auto k = ridx;
k[axis] = j;
return k;
};
block_scan<block_size>(idx,
sum{},
0,
n,
[&](auto j) { return input[compute_idx(j)]; },
[&](auto j, auto x) { output[compute_idx(j)] = x; });
});
});
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -14,6 +14,7 @@ struct gpu_allocation_model
std::string name() const;
std::string copy() const;
operation allocate(const shape& s) const;
operation preallocate(const shape& s, const std::string& id) const;
};
} // namespace gpu
......
#ifndef MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP
#define MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void prefix_scan_sum(hipStream_t stream, const argument& result, const argument& arg, int32_t axis);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP
#ifndef MIGRAPHX_GUARD_GPU_PREFIX_SCAN_SUM_HPP
#define MIGRAPHX_GUARD_GPU_PREFIX_SCAN_SUM_HPP
#include <migraphx/gpu/name.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/prefix_scan_sum.hpp>
#include <migraphx/op/prefix_scan_sum.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/type_name.hpp>
#include <utility>
#include <iostream>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_prefix_scan_sum : oper<hip_prefix_scan_sum>
{
op::prefix_scan_sum op;
template <class Self, class T>
static auto reflect(Self& self, T f)
{
return migraphx::reflect(self.op, f);
}
shape compute_shape(const std::vector<shape>& inputs) const
{
std::vector<shape> in_shapes{inputs};
in_shapes.pop_back();
check_shapes{in_shapes, *this}.standard();
return op.normalize_compute_shape(in_shapes);
}
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
if(op.exclusive or op.reverse)
MIGRAPHX_THROW("Exclusive and reverse scan not supported");
device::prefix_scan_sum(ctx.get_stream().get(), args[1], args[0], op.axis);
return args[1];
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_PREFIX_SCAN_SUM_HPP
......@@ -163,6 +163,7 @@ struct miopen_apply
add_extend_op("lrn");
add_extend_op("pad");
add_extend_op("pooling");
add_extend_op("prefix_scan_sum");
add_extend_op("reduce_max");
add_extend_op("reduce_mean");
add_extend_op("reduce_min");
......
......@@ -14,6 +14,7 @@
#include <migraphx/insert_pad.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/remap.hpp>
......@@ -31,7 +32,6 @@
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/mlir_conv.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/preallocate_param.hpp>
#include <migraphx/gpu/schedule_model.hpp>
#include <migraphx/gpu/sync_device.hpp>
#include <migraphx/gpu/target.hpp>
......@@ -98,7 +98,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
schedule{gpu::schedule_model{ctx.get_current_device().nstreams()}, not enabled(MIGRAPHX_DISABLE_SCHEDULE_PASS{})},
memory_coloring{"hip::allocate"},
sync_device{},
preallocate_param{"scratch", &ctx},
preallocate_param{"scratch", gpu_allocation_model{}},
dead_code_elimination{},
eliminate_workspace{},
eliminate_allocation{"hip::allocate"},
......
......@@ -109,7 +109,7 @@ std::pair<migraphx::program, std::vector<migraphx::argument>> run_verify::run_ta
std::transform(
tres.begin(), tres.end(), res.begin(), [&](auto& argu) { return t.copy_from(argu); });
return std::make_pair(p, res);
return std::make_pair(std::move(p), res);
}
template <class T>
......
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_prefix_scan_sum_2d_small : verify_program<test_prefix_scan_sum_2d_small>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s{migraphx::shape::float_type, {3, 3}};
auto x = mm->add_parameter("x", s);
mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), x);
return p;
}
};
struct test_prefix_scan_sum_2d_large : verify_program<test_prefix_scan_sum_2d_large>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s{migraphx::shape::float_type, {3, 1000}};
auto x = mm->add_parameter("x", s);
mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), x);
return p;
}
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment