Commit 3a848f0d authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into doc2

parents 64e8e30a d1e945da
......@@ -9,7 +9,7 @@ namespace device {
void sign(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); });
nary(stream, result, arg)([](auto x) __device__ { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void sin(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::sin(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::sin(to_hip_type(x)); });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void sinh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::sinh(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::sinh(to_hip_type(x)); });
}
} // namespace device
......
......@@ -12,11 +12,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void softmax(hipStream_t stream, const argument& result, const argument& arg, int axis)
void softmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
{
auto lens = result.get_shape().lens();
auto batch_lens = lens;
index_int batch_item_num = lens[axis];
auto batch_lens = result.get_shape().lens();
index_int batch_item_num = batch_lens[axis];
batch_lens[axis] = 1;
migraphx::shape batch_shape{result.get_shape().type(), batch_lens};
......@@ -43,7 +42,7 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
return ::exp(to_hip_type(val));
});
idx.local_stride(batch_item_num, [&](auto j) {
idx.local_stride(batch_item_num, [&](auto j) __device__ {
data_idx[axis] = j;
auto val = input[data_idx] - batch_max;
output[data_idx] = ::exp(to_hip_type(val)) / batch_sum;
......
......@@ -8,7 +8,7 @@ namespace device {
void sqdiff(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return (x - y) * (x - y); });
nary(stream, result, arg1, arg2)([](auto x, auto y) __device__ { return (x - y) * (x - y); });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void sqrt(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::sqrt(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::sqrt(to_hip_type(x)); });
}
} // namespace device
......
......@@ -8,7 +8,7 @@ namespace device {
void sub(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return x - y; });
nary(stream, result, arg1, arg2)([](auto x, auto y) __device__ { return x - y; });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void tan(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::tan(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::tan(to_hip_type(x)); });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void tanh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::tanh(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::tanh(to_hip_type(x)); });
}
} // namespace device
......
......@@ -148,6 +148,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
return false;
if(wei.lens()[1] > 512 and conv.algo != miopenConvolutionFwdAlgoWinograd)
return false;
// Do not fuse non-symmetric input
auto input_lens = ins->inputs().at(0)->get_shape().lens();
if(input_lens[2] != input_lens[3] or wei.lens()[2] != wei.lens()[3])
return false;
auto op = conv.op;
// Dont fuse winograd for non-3x3s since there is no fused windograd for those configs
if(conv.algo == miopenConvolutionFwdAlgoWinograd and wei.lens()[2] != 3 and
......
#ifndef MIGRAPHX_GUARD_RTGLIB_ACOSH_HPP
#define MIGRAPHX_GUARD_RTGLIB_ACOSH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/acosh.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct hip_acosh : unary_device<hip_acosh, device::acosh>
{
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
#define MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/asinh.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct hip_asinh : unary_device<hip_asinh, device::asinh>
{
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/atanh.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct hip_atanh : unary_device<hip_atanh, device::atanh>
{
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DECONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_DECONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct miopen_deconvolution
{
op::deconvolution op;
shared<convolution_descriptor> cd;
miopenConvFwdAlgorithm_t algo{};
miopenHandle_t handle = nullptr;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
// TODO: Add algo
return op::convolution::reflect(self.op, f);
}
std::string name() const { return "gpu::deconv"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOSH_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOSH_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void acosh(hipStream_t stream, const argument& result, const argument& arg);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -72,9 +72,8 @@ template <class Op>
void arg_op(Op op, hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
{
auto arg_shape = arg.get_shape();
auto lens = arg_shape.lens();
auto batch_lens = lens;
size_t batch_item_num = lens[axis];
auto batch_lens = arg_shape.lens();
size_t batch_item_num = batch_lens[axis];
batch_lens[axis] = 1;
migraphx::shape batch_shape{arg_shape.type(), batch_lens};
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ASINH_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ASINH_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void asinh(hipStream_t stream, const argument& result, const argument& arg);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ATANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ATANH_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void atanh(hipStream_t stream, const argument& result, const argument& arg);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -10,7 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, int axis);
void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis);
} // namespace device
} // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_PRELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_PRELU_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void prelu(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment