"backend/vscode:/vscode.git/clone" did not exist on "9bcd4ce5c0a01af68c0d2aa44554a68bb741c61b"
Commit bc5d7f75 authored by Paul's avatar Paul
Browse files

Merge from develop

parents 47c0854d a5b0afa0
#include <migraphx/gpu/device/exp.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void exp(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::exp(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/gather.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument gather(hipStream_t stream,
const migraphx::shape& output_shape,
std::vector<migraphx::argument> args,
int axis)
{
int axis_index = (axis < 0) ? (axis + output_shape.lens().size()) : axis;
visit_all(args.back(), args[0])([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
args[1].visit([&](auto indices) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
const auto* indices_ptr = device_cast(indices.data());
auto* outptr = device_cast(output.data());
const auto* inptr = device_cast(input.data());
hip_tensor_descriptor<ndim> desc_input(input.get_shape());
hip_tensor_descriptor<ndim> desc_output(output.get_shape());
gs_launch(stream, nelements)([=](auto i) {
auto lens = desc_output.multi(i);
lens[axis_index] = indices_ptr[lens[axis_index]];
outptr[i] = inptr[desc_input.linear(lens)];
});
});
});
});
return args.back();
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP #define MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <migraph/config.hpp> #include <migraphx/config.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -53,14 +53,14 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102 ...@@ -53,14 +53,14 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102
// Workaround hcc's broken tile_static macro // Workaround hcc's broken tile_static macro
#ifdef tile_static #ifdef tile_static
#undef tile_static #undef tile_static
#define MIGRAPH_DEVICE_SHARED __attribute__((tile_static)) #define MIGRAPHX_DEVICE_SHARED __attribute__((tile_static))
#else #else
#define MIGRAPH_DEVICE_SHARED __shared__ #define MIGRAPHX_DEVICE_SHARED __shared__
#endif #endif
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#endif #endif
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP #define MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
#include <migraph/gpu/device/tensor.hpp> #include <migraphx/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp> #include <migraphx/gpu/device/launch.hpp>
#include <migraph/gpu/device/types.hpp> #include <migraphx/gpu/device/types.hpp>
#include <migraph/functional.hpp> #include <migraphx/functional.hpp>
#include <migraph/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraph/config.hpp> #include <migraphx/config.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -87,7 +87,7 @@ void trinary_broadcast_vec_impl(hipStream_t stream, ...@@ -87,7 +87,7 @@ void trinary_broadcast_vec_impl(hipStream_t stream,
const std::size_t bdim_vec_len = bdim_len / vec_size; const std::size_t bdim_vec_len = bdim_len / vec_size;
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size]; MIGRAPHX_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
{ {
...@@ -144,7 +144,7 @@ void trinary_broadcast_impl(hipStream_t stream, ...@@ -144,7 +144,7 @@ void trinary_broadcast_impl(hipStream_t stream,
const std::size_t n = output.size(); const std::size_t n = output.size();
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED type buffer[2048]; MIGRAPHX_DEVICE_SHARED type buffer[2048];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_len; i += nlocal) for(size_t i = idx.local; i < bdim_len; i += nlocal)
{ {
...@@ -192,7 +192,7 @@ void binary_broadcast_vec_impl( ...@@ -192,7 +192,7 @@ void binary_broadcast_vec_impl(
const std::size_t bdim_vec_len = bdim_len / vec_size; const std::size_t bdim_vec_len = bdim_len / vec_size;
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size]; MIGRAPHX_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
{ {
...@@ -243,7 +243,7 @@ void binary_broadcast_impl( ...@@ -243,7 +243,7 @@ void binary_broadcast_impl(
const std::size_t n = output.size(); const std::size_t n = output.size();
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED type buffer[2048]; MIGRAPHX_DEVICE_SHARED type buffer[2048];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_len; i += nlocal) for(size_t i = idx.local; i < bdim_len; i += nlocal)
{ {
...@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args) ...@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
nary_nonstandard_impl(stream, f, result, args...); nary_nonstandard_impl(stream, f, result, args...);
} }
template <class F>
void nary_impl(hipStream_t stream, F f, argument result)
{
nary_standard_impl(stream, f, result);
}
template <class... Arguments> template <class... Arguments>
auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args) auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args)
{ {
...@@ -396,7 +402,7 @@ inline auto nary(hipStream_t stream, ...@@ -396,7 +402,7 @@ inline auto nary(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#endif #endif
#ifndef MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP #define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <migraph/functional.hpp> #include <migraphx/functional.hpp>
#include <migraph/config.hpp> #include <migraphx/config.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -87,7 +87,7 @@ struct hip_tensor_descriptor ...@@ -87,7 +87,7 @@ struct hip_tensor_descriptor
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#endif #endif
...@@ -5,14 +5,14 @@ ...@@ -5,14 +5,14 @@
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/ ==============================================================================*/
#ifndef MIGRAPH_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#define MIGRAPH_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP #define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#include <migraph/half.hpp> #include <migraphx/half.hpp>
#include <migraph/config.hpp> #include <migraphx/config.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -75,9 +75,18 @@ device_type<T>* device_cast(T* x) ...@@ -75,9 +75,18 @@ device_type<T>* device_cast(T* x)
return reinterpret_cast<device_type<T>*>(x); return reinterpret_cast<device_type<T>*>(x);
} }
template <class T>
T to_hip_type(T x)
{
return x;
}
// Hip doens't support __fp16
inline float to_hip_type(gpu_half x) { return x; }
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#endif #endif
#include <migraphx/gpu/device/log.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void log(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::log(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/max.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void max(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::max(to_hip_type(x), to_hip_type(y)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/min.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void min(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::min(to_hip_type(x), to_hip_type(y)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/device/mul.hpp> #include <migraphx/gpu/device/mul.hpp>
#include <migraph/gpu/device/nary.hpp> #include <migraphx/gpu/device/nary.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -22,5 +22,5 @@ void mul(hipStream_t stream, ...@@ -22,5 +22,5 @@ void mul(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/pad.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument
pad(hipStream_t stream, argument result, argument arg1, float value, std::vector<std::int64_t> pads)
{
std::size_t nelements = arg1.get_shape().elements();
nary(stream, result)([=] { return value; });
visit_all(result, arg1)([&](auto output, auto input) {
visit_tensor_size(result.get_shape().lens().size(), [&](auto ndim) {
std::size_t offsets[ndim];
std::copy(pads.begin(), pads.begin() + ndim, offsets);
auto* outptr = output.data();
const auto* inptr = input.data();
hip_tensor_descriptor<ndim> desc_input(input.get_shape());
hip_tensor_descriptor<ndim> desc_output(output.get_shape());
gs_launch(stream, nelements)([=](auto i) {
auto idx = desc_input.multi(i);
for(std::size_t j = 0; j < ndim; j++)
{
idx[j] += offsets[j];
}
outptr[desc_output.linear(idx)] = inptr[i];
});
});
});
return result;
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/sin.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void sin(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::sin(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/sinh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void sinh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::sinh(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/sub.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void sub(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return y - x; });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/tan.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void tan(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::tan(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/eliminate_workspace.hpp> #include <migraphx/gpu/eliminate_workspace.hpp>
#include <migraph/gpu/hip.hpp> #include <migraphx/gpu/hip.hpp>
#include <migraph/program.hpp> #include <migraphx/program.hpp>
#include <migraph/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraph/operators.hpp> #include <migraphx/operators.hpp>
#include <migraph/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraph/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraph/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraph/pass_config.hpp> #include <migraphx/pass_config.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
void eliminate_workspace::apply(program& p) const void eliminate_workspace::apply(program& p) const
{ {
if(!enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
return;
std::size_t n = 0; std::size_t n = 0;
std::vector<instruction_ref> allocs; std::vector<instruction_ref> allocs;
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(p))
...@@ -32,14 +29,17 @@ void eliminate_workspace::apply(program& p) const ...@@ -32,14 +29,17 @@ void eliminate_workspace::apply(program& p) const
allocs.push_back(ins); allocs.push_back(ins);
} }
} }
auto ws = p.add_parameter("workspace", shape{shape::int8_type, {n}}); if(n > 0)
for(auto&& a : allocs)
{ {
p.replace_instruction(a, ws); auto ws = p.add_parameter("workspace", shape{shape::int8_type, {n}});
p.remove_instruction(a); for(auto&& a : allocs)
{
p.replace_instruction(a, ws);
p.remove_instruction(a);
}
} }
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraphx/gpu/elu.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_elu::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_elu::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/fuse_ops.hpp> #include <migraphx/gpu/fuse_ops.hpp>
#include <migraph/matcher.hpp> #include <migraphx/matcher.hpp>
#include <migraph/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <migraph/gpu/convolution.hpp> #include <migraphx/gpu/convolution.hpp>
#include <migraph/gpu/device/add_relu.hpp> #include <migraphx/gpu/device/add_relu.hpp>
#include <migraph/instruction.hpp> #include <migraphx/instruction.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
struct fusion struct fusion
...@@ -38,7 +38,7 @@ struct fusion ...@@ -38,7 +38,7 @@ struct fusion
op_t result; op_t result;
auto status = miopenFusionPlanGetOp(fp.get(), i, &result); auto status = miopenFusionPlanGetOp(fp.get(), i, &result);
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPH_THROW("Failed retrieving operator at " + std::to_string(i)); MIGRAPHX_THROW("Failed retrieving operator at " + std::to_string(i));
return result; return result;
} }
...@@ -51,7 +51,7 @@ struct fusion ...@@ -51,7 +51,7 @@ struct fusion
auto t = keep_alive(make_tensor(b)); auto t = keep_alive(make_tensor(b));
auto status = miopenCreateOpBiasForward(fp.get(), &result, t.get()); auto status = miopenCreateOpBiasForward(fp.get(), &result, t.get());
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPH_THROW("Creating operator failed"); MIGRAPHX_THROW("Creating operator failed");
return result; return result;
} }
...@@ -60,7 +60,7 @@ struct fusion ...@@ -60,7 +60,7 @@ struct fusion
op_t result; op_t result;
auto status = miopenCreateOpActivationForward(fp.get(), &result, miopenActivationRELU); auto status = miopenCreateOpActivationForward(fp.get(), &result, miopenActivationRELU);
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPH_THROW("Creating operator failed"); MIGRAPHX_THROW("Creating operator failed");
return result; return result;
} }
...@@ -71,7 +71,7 @@ struct fusion ...@@ -71,7 +71,7 @@ struct fusion
auto t = keep_alive(make_tensor(weights)); auto t = keep_alive(make_tensor(weights));
auto status = miopenCreateOpConvForward(fp.get(), &result, cd.get(), t.get()); auto status = miopenCreateOpConvForward(fp.get(), &result, cd.get(), t.get());
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPH_THROW("Creating operator failed"); MIGRAPHX_THROW("Creating operator failed");
return result; return result;
} }
...@@ -91,7 +91,7 @@ struct fusion ...@@ -91,7 +91,7 @@ struct fusion
{ {
auto status = miopenCompileFusionPlan(ctx.get_stream().get_miopen(), fp.get()); auto status = miopenCompileFusionPlan(ctx.get_stream().get_miopen(), fp.get());
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPH_THROW("Compiling fusion plan failed"); MIGRAPHX_THROW("Compiling fusion plan failed");
} }
argument execute(context& ctx, argument execute(context& ctx,
...@@ -109,12 +109,12 @@ struct fusion ...@@ -109,12 +109,12 @@ struct fusion
y.implicit(), y.implicit(),
fargs.get()); fargs.get());
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPH_THROW("Failed to execute fusion plan"); MIGRAPHX_THROW("Failed to execute fusion plan");
return y; return y;
} }
}; };
MIGRAPH_PRED_MATCHER(bias_shape, instruction_ref ins) MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
{ {
auto&& s = ins->get_shape(); auto&& s = ins->get_shape();
return s.broadcasted() and s.strides().size() == 4 and s.strides()[0] == 0 and return s.broadcasted() and s.strides().size() == 4 and s.strides()[0] == 0 and
...@@ -128,7 +128,7 @@ std::array<T, sizeof...(Ts) + 1> make_array(T x, Ts... xs) ...@@ -128,7 +128,7 @@ std::array<T, sizeof...(Ts) + 1> make_array(T x, Ts... xs)
return {std::move(x), std::move(static_cast<T>(xs))...}; return {std::move(x), std::move(static_cast<T>(xs))...};
} }
MIGRAPH_PRED_MATCHER(fusable_conv, instruction_ref ins) MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
{ {
if(ins->name() != "gpu::convolution") if(ins->name() != "gpu::convolution")
return false; return false;
...@@ -137,6 +137,8 @@ MIGRAPH_PRED_MATCHER(fusable_conv, instruction_ref ins) ...@@ -137,6 +137,8 @@ MIGRAPH_PRED_MATCHER(fusable_conv, instruction_ref ins)
auto wei = ins->inputs().at(1)->get_shape(); auto wei = ins->inputs().at(1)->get_shape();
assert(wei.lens().size() == 4); assert(wei.lens().size() == 4);
auto conv = any_cast<miopen_convolution>(ins->get_operator()); auto conv = any_cast<miopen_convolution>(ins->get_operator());
if(conv.op.group > 1)
return false;
if(wei.lens()[1] > 512 and conv.algo != miopenConvolutionFwdAlgoWinograd) if(wei.lens()[1] > 512 and conv.algo != miopenConvolutionFwdAlgoWinograd)
return false; return false;
auto op = conv.op; auto op = conv.op;
...@@ -265,17 +267,15 @@ struct miopen_conv_bias ...@@ -265,17 +267,15 @@ struct miopen_conv_bias
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{ {
auto fargs = make_fused_args(); auto fargs = make_fused_args();
float alpha = 1, beta = 0; float alpha = 1;
float beta = 0;
miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit()); miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit());
miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit()); miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit());
return f.execute(ctx, fargs, args[0], args[4]); return f.execute(ctx, fargs, args[0], args[4]);
} }
shape compile(context& ctx) void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
{ shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
f.compile(ctx);
return f.get_workspace(ctx);
}
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
}; };
...@@ -308,18 +308,15 @@ struct miopen_conv_bias_relu ...@@ -308,18 +308,15 @@ struct miopen_conv_bias_relu
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{ {
auto fargs = make_fused_args(); auto fargs = make_fused_args();
float alpha = 1, beta = 0; float alpha = 1;
float beta = 0;
miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit()); miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit());
miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit()); miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit());
miopenSetOpArgsActivForward(fargs.get(), relu, &alpha, &beta, 0, 0, 0); miopenSetOpArgsActivForward(fargs.get(), relu, &alpha, &beta, 0, 0, 0);
return f.execute(ctx, fargs, args[0], args[4]); return f.execute(ctx, fargs, args[0], args[4]);
} }
void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
shape compile(context& ctx) shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
{
f.compile(ctx);
return f.get_workspace(ctx);
}
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
}; };
...@@ -346,8 +343,8 @@ void apply_conv_bias(context& ctx, program& p, match::matcher_result r) ...@@ -346,8 +343,8 @@ void apply_conv_bias(context& ctx, program& p, match::matcher_result r)
Op cb{conv_op, input_ins->get_shape(), weights_ins->get_shape(), bias_ins->get_shape()}; Op cb{conv_op, input_ins->get_shape(), weights_ins->get_shape(), bias_ins->get_shape()};
// TODO: Insert ws allocation // TODO: Insert ws allocation
auto ws = cb.compile(ctx); auto ws = cb.get_workspace(ctx);
(void)ws;
p.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins); p.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins);
} }
...@@ -389,5 +386,5 @@ void fuse_ops::apply(program& p) const ...@@ -389,5 +386,5 @@ void fuse_ops::apply(program& p) const
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraphx/gpu/gather.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/device/concat.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_gather::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
return op.compute_shape(inputs);
}
argument hip_gather::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
return device::gather(ctx.get_stream().get(), output_shape, args, op.axis);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/gemm.hpp> #include <migraphx/gpu/gemm.hpp>
#include <migraph/operators.hpp> #include <migraphx/operators.hpp>
#include <migraph/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <utility> #include <utility>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
template <class... Ts> template <class... Ts>
...@@ -29,7 +29,7 @@ void generic_rocblas_gemm(shape::as<half>, Ts&&... xs) ...@@ -29,7 +29,7 @@ void generic_rocblas_gemm(shape::as<half>, Ts&&... xs)
template <class T, class... Ts> template <class T, class... Ts>
void generic_rocblas_gemm(shape::as<T>, Ts&&...) void generic_rocblas_gemm(shape::as<T>, Ts&&...)
{ {
MIGRAPH_THROW("Type unsupported by rocblas"); MIGRAPHX_THROW("Type unsupported by rocblas");
} }
template <class T> template <class T>
...@@ -107,9 +107,10 @@ argument miopen_gemm::compute(context& ctx, ...@@ -107,9 +107,10 @@ argument miopen_gemm::compute(context& ctx,
ldc); ldc);
}); });
return args[2]; return args[2];
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment