"megatron/git@developer.sourcefind.cn:OpenDAS/megatron-lm.git" did not exist on "12518332df3797ae1213102c95a1bccbf04c324d"
Commit 5f68a283 authored by wsttiger's avatar wsttiger
Browse files

Added HIP (rocm v. 1.8.2) to deps; added device library

parent e85e0fa1
pfultz2/rocm-recipes pfultz2/rocm-recipes
danmar/cppcheck@d9f9bdda7344e80585f71141be7797055d7987f3 danmar/cppcheck@d9f9bdda7344e80585f71141be7797055d7987f3
ROCm-Developer-Tools/HIP@3a41f286203968421c557338d6fb39c36f3c717c
# python/cpython@v3.6.6 -X autotools -H sha256:92aa914572c695c0aeb01b0a214813f414da4b51a371234df514a74761f2bb36 # python/cpython@v3.6.6 -X autotools -H sha256:92aa914572c695c0aeb01b0a214813f414da4b51a371234df514a74761f2bb36
-f requirements.txt -f requirements.txt
...@@ -13,3 +13,13 @@ add_library(migraph_miopen ...@@ -13,3 +13,13 @@ add_library(migraph_miopen
rocm_clang_tidy_check(migraph_miopen) rocm_clang_tidy_check(migraph_miopen)
target_link_libraries(migraph_miopen migraph MIOpen) target_link_libraries(migraph_miopen migraph MIOpen)
target_include_directories(migraph_miopen PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraph_miopen PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
add_library(migraph_device
kernels.cu
)
rocm_clang_tidy_check(migraph_device)
target_link_libraries(migraph_device migraph hip::device)
target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
...@@ -11,30 +11,6 @@ namespace miopen { ...@@ -11,30 +11,6 @@ namespace miopen {
using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree); using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
template <int NDIM>
struct HIPTensorDescriptor
{
size_t lens[NDIM];
size_t strides[NDIM];
};
template <typename T, int NDIM>
__global__
void contiguous_gpu(const T* A,
HIPTensorDescriptor<NDIM> td_a,
T* At,
HIPTensorDescriptor<NDIM> td_at,
size_t nelements) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x;
i < nelements; i += blockDim.x * gridDim.x) {
size_t s[NDIM];
multiindex<NDIM>(td_at.strides, i, s);
size_t lidx = 0;
for (size_t j = 0; j < NDIM; j++) lidx += s[j] * td_a.strides[j];
At[i] = A[lidx];
}
}
hip_ptr allocate_gpu(std::size_t sz) hip_ptr allocate_gpu(std::size_t sz)
{ {
void* result; void* result;
...@@ -91,28 +67,6 @@ migraph::argument from_gpu(migraph::argument arg) ...@@ -91,28 +67,6 @@ migraph::argument from_gpu(migraph::argument arg)
return result; return result;
} }
migraph::argument hip_contiguous(migraph::argument arg, migraph::shape output_shape)
{
migraph::argument result{output_shape};
visit_all(result, arg)([&](auto output, auto input) {
HIPTensorDescriptor td_a, td_at;
auto s = arg.get_shape();
for (int i = 0; i < output_shape.lens().size(); i++) {
td_a.strides[i] = s.strides().at(i);
td_at.strides[i] = output_shape.strides().at(i);
}
dim3 nblocks(512);
dim3 nthreads(512);
hipLaunchKernelGGL((contiguous_gpu<int, 4>), nblocks, nthreads, 0, 0,
input.data(),
td_a,
output.data(),
td_at,
s.elements());
});
return result;
}
} // namespace miopen } // namespace miopen
} // namespace migraph } // namespace migraph
#ifndef MIGRAPH_GUARD_MIGRAPHLIB_KERNELS_HPP
#define MIGRAPH_GUARD_MIGRAPHLIB_KERNELS_HPP
namespace migraph {
namespace miopen {
migraph::argument hip_contiguous(migraph::argument arg, migraph::shape output_shape);
} // namespace miopen
} // namespace migraph
#endif
#include <hip/hip_runtime.h>
#include <migraph/operators.hpp>
namespace migraph {
namespace miopen {
template <int NDIM>
struct HIPTensorDescriptor
{
size_t lens[NDIM];
size_t strides[NDIM];
};
template <int NDIM>
__host__ __device__ void multiindex(size_t (&strides)[NDIM], size_t idx, size_t* result)
{
size_t tidx = idx;
for(size_t is = 0; is < NDIM; is++)
{
result[is] = tidx / strides[is];
tidx = tidx % strides[is];
}
}
template <typename T, int NDIM>
__global__ void contiguous_gpu(const T* A,
HIPTensorDescriptor<NDIM> td_a,
T* At,
HIPTensorDescriptor<NDIM> td_at,
size_t nelements)
{
for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < nelements;
i += blockDim.x * gridDim.x)
{
size_t s[NDIM];
multiindex<NDIM>(td_at.strides, i, s);
size_t lidx = 0;
for(size_t j = 0; j < NDIM; j++)
lidx += s[j] * td_a.strides[j];
At[i] = A[lidx];
}
}
migraph::argument hip_contiguous(migraph::argument arg, migraph::shape output_shape)
{
migraph::argument result{output_shape};
size_t ndim = output_shape.lens().size();
visit_all(result, arg)([&](auto output, auto input) {
if(ndim == 4)
{
HIPTensorDescriptor<4> td_a, td_at;
auto s = arg.get_shape();
for(int i = 0; i < output_shape.lens().size(); i++)
{
td_a.strides[i] = s.strides().at(i);
td_at.strides[i] = output_shape.strides().at(i);
}
dim3 nblocks(512);
dim3 nthreads(512);
hipLaunchKernelGGL((contiguous_gpu<int, 4>),
nblocks,
nthreads,
0,
0,
input.data(),
td_a,
output.data(),
td_at,
s.elements());
}
else
{
MIGRAPH_THROW("contiguous is only valid for 4D tensors");
}
});
return result;
}
} // namespace miopen
} // namespace migraph
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment