Commit e85e0fa1 authored by wsttiger's avatar wsttiger
Browse files

Initial checkin for adding contiguous operator using HIP

parent 8addb9d5
...@@ -11,6 +11,30 @@ namespace miopen { ...@@ -11,6 +11,30 @@ namespace miopen {
using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree); using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
template <int NDIM>
struct HIPTensorDescriptor
{
size_t lens[NDIM];
size_t strides[NDIM];
};
template <typename T, int NDIM>
__global__
void contiguous_gpu(const T* A,
HIPTensorDescriptor<NDIM> td_a,
T* At,
HIPTensorDescriptor<NDIM> td_at,
size_t nelements) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x;
i < nelements; i += blockDim.x * gridDim.x) {
size_t s[NDIM];
multiindex<NDIM>(td_at.strides, i, s);
size_t lidx = 0;
for (size_t j = 0; j < NDIM; j++) lidx += s[j] * td_a.strides[j];
At[i] = A[lidx];
}
}
hip_ptr allocate_gpu(std::size_t sz) hip_ptr allocate_gpu(std::size_t sz)
{ {
void* result; void* result;
...@@ -67,6 +91,28 @@ migraph::argument from_gpu(migraph::argument arg) ...@@ -67,6 +91,28 @@ migraph::argument from_gpu(migraph::argument arg)
return result; return result;
} }
migraph::argument hip_contiguous(migraph::argument arg, migraph::shape output_shape)
{
migraph::argument result{output_shape};
visit_all(result, arg)([&](auto output, auto input) {
HIPTensorDescriptor td_a, td_at;
auto s = arg.get_shape();
for (int i = 0; i < output_shape.lens().size(); i++) {
td_a.strides[i] = s.strides().at(i);
td_at.strides[i] = output_shape.strides().at(i);
}
dim3 nblocks(512);
dim3 nthreads(512);
hipLaunchKernelGGL((contiguous_gpu<int, 4>), nblocks, nthreads, 0, 0,
input.data(),
td_a,
output.data(),
td_at,
s.elements());
});
return result;
}
} // namespace miopen } // namespace miopen
} // namespace migraph } // namespace migraph
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment