Commit cff1144d authored by Paul's avatar Paul
Browse files

Add add_relu device function

parent 48bf509a
......@@ -11,6 +11,7 @@ if(NOT TARGET MIOpen)
endif()
add_library(migraph_device
device/add_relu.cpp
device/contiguous.cpp
)
rocm_clang_tidy_check(migraph_device)
......
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/binary.hpp>
namespace migraph {
namespace gpu {
namespace device {
void add_relu(argument arg1, argument arg2, argument result)
{
binary_standard(arg1, arg2, result, [](auto x, auto y) {
return max(0, x + y);
});
}
} // namespace device
} // namespace gpu
} // namespace migraph
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/launch.hpp>
#include <migraph/gpu/device/unary.hpp>
namespace migraph {
namespace gpu {
namespace device {
template <class F>
void visit_tensor_size(std::size_t n, F f)
void contiguous(argument arg, argument result)
{
switch(n)
{
case 1:
{
f(std::integral_constant<std::size_t, 1>{});
break;
}
case 2:
{
f(std::integral_constant<std::size_t, 2>{});
break;
}
case 3:
{
f(std::integral_constant<std::size_t, 3>{});
break;
}
case 4:
{
f(std::integral_constant<std::size_t, 4>{});
break;
}
case 5:
{
f(std::integral_constant<std::size_t, 5>{});
break;
}
default: throw std::runtime_error("Unknown tensor size");
}
unary_nonstandard(arg, result, [](auto x) { return x; });
}
template <size_t NDim>
struct hip_index
{
size_t d[NDim];
__device__ __host__ size_t& operator[](size_t i) { return d[i]; }
__device__ __host__ size_t operator[](size_t i) const { return d[i]; }
};
template <size_t NDim>
struct hip_tensor_descriptor
{
__device__ __host__ hip_tensor_descriptor() = default;
template <typename T, typename V>
__device__ __host__ hip_tensor_descriptor(const T& lens_ext, const V& strides_ext)
{
for(size_t i = 0; i < NDim; i++)
lens[i] = lens_ext[i];
for(size_t i = 0; i < NDim; i++)
strides[i] = strides_ext[i];
}
__device__ __host__ hip_index<NDim> multi(size_t idx) const
{
hip_index<NDim> result{};
size_t tidx = idx;
for(size_t is = 0; is < NDim; is++)
{
result[is] = tidx / strides[is];
tidx = tidx % strides[is];
}
return result;
}
__device__ __host__ size_t linear(hip_index<NDim> s) const
{
size_t idx = 0;
for(size_t i = 0; i < NDim; i++)
idx += s[i] * strides[i];
return idx;
}
size_t lens[NDim] = {};
size_t strides[NDim] = {};
};
void contiguous(shape output_shape, argument arg, argument result)
{
visit_all(result, arg)([&](auto output, auto input) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
const auto& s = arg.get_shape();
hip_tensor_descriptor<ndim> a_desc(s.lens(), s.strides());
hip_tensor_descriptor<ndim> at_desc(output_shape.lens(), output_shape.strides());
auto* a = input.data();
auto* at = output.data();
gs_launch(s.elements())([=](auto i) {
size_t lidx = a_desc.linear(at_desc.multi(i));
at[i] = a[lidx];
});
});
});
}
} // namespace device
} // namespace gpu
} // namespace migraph
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_BINARY_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_BINARY_HPP
#include <migraph/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
namespace migraph {
namespace gpu {
namespace device {
template<class F>
void binary(argument x, argument y, argument result, F f)
{
if(x.get_shape().standard())
binary_standard(x, y, result, f);
else
binary_nonstandard(x, y, result, f);
}
template<class F>
void binary_nonstandard(argument x, argument y, argument result, F f)
{
auto output_shape = result.get_shape();
auto input_shape = x.get_shape();
visit_all(result, x, y)([&](auto output, auto input1, auto input2) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
hip_tensor_descriptor<ndim> x_desc(x.get_shape().lens(), x.get_shape().strides());
hip_tensor_descriptor<ndim> y_desc(y.get_shape().lens(), y.get_shape().strides());
hip_tensor_descriptor<ndim> out_desc(output_shape.lens(), output_shape.strides());
auto* xp = input1.data();
auto* yp = input2.data();
auto* outp = output.data();
gs_launch(input_shape.elements())([=](auto i) {
auto outidx = out_desc.multi(i);
size_t xidx = x_desc.linear(outidx);
size_t yidx = y_desc.linear(outidx);
outp[i] = f(xp[xidx], yp[yidx]);
});
});
});
}
template<class F>
void binary_standard(argument x, argument y, argument result, F f)
{
assert(x.get_shape().elements() == y.get_shape().elements());
auto output_shape = result.get_shape();
auto input_shape = x.get_shape();
visit_all(result, x, y)([&](auto output, auto input1, auto input2) {
auto* xp = input1.data();
auto* yp = input2.data();
auto* outp = output.data();
gs_launch(input_shape.elements())([=](auto i) {
outp[i] = f(xp[i], yp[i]);
});
});
}
} // namespace device
} // namespace gpu
} // namespace migraph
#endif
......@@ -21,7 +21,7 @@ __global__ void launcher(F f)
f(idx);
}
auto launch(std::size_t global, std::size_t local)
inline auto launch(std::size_t global, std::size_t local)
{
return [=](auto f) {
assert(local > 0);
......@@ -33,7 +33,7 @@ auto launch(std::size_t global, std::size_t local)
};
}
auto gs_launch(std::size_t n, std::size_t local = 512)
inline auto gs_launch(std::size_t n, std::size_t local = 512)
{
std::size_t groups = 1 + n / local;
std::size_t nglobal = std::min<std::size_t>(512, groups) * local;
......
#ifndef MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h>
namespace migraph {
namespace gpu {
namespace device {
template <class F>
void visit_tensor_size(std::size_t n, F f)
{
switch(n)
{
case 1:
{
f(std::integral_constant<std::size_t, 1>{});
break;
}
case 2:
{
f(std::integral_constant<std::size_t, 2>{});
break;
}
case 3:
{
f(std::integral_constant<std::size_t, 3>{});
break;
}
case 4:
{
f(std::integral_constant<std::size_t, 4>{});
break;
}
case 5:
{
f(std::integral_constant<std::size_t, 5>{});
break;
}
default: throw std::runtime_error("Unknown tensor size");
}
}
template <size_t NDim>
struct hip_index
{
size_t d[NDim];
__device__ __host__ size_t& operator[](size_t i) { return d[i]; }
__device__ __host__ size_t operator[](size_t i) const { return d[i]; }
};
template <size_t NDim>
struct hip_tensor_descriptor
{
__device__ __host__ hip_tensor_descriptor() = default;
template <typename T, typename V>
__device__ __host__ hip_tensor_descriptor(const T& lens_ext, const V& strides_ext)
{
for(size_t i = 0; i < NDim; i++)
lens[i] = lens_ext[i];
for(size_t i = 0; i < NDim; i++)
strides[i] = strides_ext[i];
}
__device__ __host__ hip_index<NDim> multi(size_t idx) const
{
hip_index<NDim> result{};
size_t tidx = idx;
for(size_t is = 0; is < NDim; is++)
{
result[is] = tidx / strides[is];
tidx = tidx % strides[is];
}
return result;
}
__device__ __host__ size_t linear(hip_index<NDim> s) const
{
size_t idx = 0;
for(size_t i = 0; i < NDim; i++)
idx += s[i] * strides[i];
return idx;
}
size_t lens[NDim] = {};
size_t strides[NDim] = {};
};
} // namespace device
} // namespace gpu
} // namespace migraph
#endif
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_UNARY_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_UNARY_HPP
#include <migraph/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
namespace migraph {
namespace gpu {
namespace device {
template<class F>
void unary(argument x, argument result, F f)
{
if(x.get_shape().standard())
unary_standard(x, result, f);
else
unary_nonstandard(x, result, f);
}
template<class F>
void unary_nonstandard(argument x, argument result, F f)
{
auto output_shape = result.get_shape();
auto input_shape = x.get_shape();
visit_all(result, x)([&](auto output, auto input) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
hip_tensor_descriptor<ndim> x_desc(input_shape.lens(), input_shape.strides());
hip_tensor_descriptor<ndim> out_desc(output_shape.lens(), output_shape.strides());
auto* xp = input.data();
auto* outp = output.data();
gs_launch(input_shape.elements())([=](auto i) {
size_t xidx = x_desc.linear(out_desc.multi(i));
outp[i] = f(xp[xidx]);
});
});
});
}
template<class F>
void unary_standard(argument x, argument result, F f)
{
auto output_shape = result.get_shape();
auto input_shape = x.get_shape();
visit_all(result, x)([&](auto output, auto input) {
auto* xp = input.data();
auto* outp = output.data();
gs_launch(input_shape.elements())([=](auto i) {
outp[i] = f(xp[i]);
});
});
}
} // namespace device
} // namespace gpu
} // namespace migraph
#endif
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#include <migraph/argument.hpp>
namespace migraph {
namespace gpu {
namespace device {
void add_relu(argument arg1, argument arg2, argument result);
} // namespace device
} // namespace gpu
} // namespace migraph
#endif
......@@ -7,7 +7,7 @@ namespace migraph {
namespace gpu {
namespace device {
void contiguous(shape output_shape, argument arg, argument result);
void contiguous(argument arg, argument result);
} // namespace device
} // namespace gpu
......
......@@ -252,7 +252,8 @@ struct miopen_contiguous
{
assert(output_shape == args[1].get_shape());
assert(output_shape.standard());
device::contiguous(std::move(output_shape), args.at(0), args.at(1));
(void)output_shape;
device::contiguous(args.at(0), args.at(1));
return args.at(1);
}
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment