Commit bf2ee683 authored by Paul's avatar Paul
Browse files

Add more syncs

parent 088dc5eb
...@@ -27,6 +27,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false) ...@@ -27,6 +27,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
if(sz > get_available_gpu_memory()) if(sz > get_available_gpu_memory())
MIGRAPH_THROW("Memory not available to allocate buffer: " + std::to_string(sz)); MIGRAPH_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
void* result; void* result;
gpu_sync();
auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz); auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz);
if(status != hipSuccess) if(status != hipSuccess)
{ {
...@@ -39,19 +40,12 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false) ...@@ -39,19 +40,12 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
return hip_ptr{result}; return hip_ptr{result};
} }
template <class T>
hip_ptr write_to_gpu(const T& x)
{
using type = typename T::value_type;
auto size = x.size() * sizeof(type);
return write_to_gpu(x.data(), size);
}
template <class T> template <class T>
std::vector<T> read_from_gpu(const void* x, std::size_t sz) std::vector<T> read_from_gpu(const void* x, std::size_t sz)
{ {
gpu_sync();
std::vector<T> result(sz); std::vector<T> result(sz);
gpu_sync();
auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost); auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
...@@ -61,8 +55,8 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz) ...@@ -61,8 +55,8 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false) hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
{ {
gpu_sync();
auto result = allocate_gpu(sz, host); auto result = allocate_gpu(sz, host);
gpu_sync();
auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice); auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status)); MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
...@@ -70,6 +64,14 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false) ...@@ -70,6 +64,14 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
return result; return result;
} }
template <class T>
hip_ptr write_to_gpu(const T& x)
{
using type = typename T::value_type;
auto size = x.size() * sizeof(type);
return write_to_gpu(x.data(), size);
}
argument allocate_gpu(const shape& s, bool host) argument allocate_gpu(const shape& s, bool host)
{ {
auto p = share(allocate_gpu(s.bytes() + 1, host)); auto p = share(allocate_gpu(s.bytes() + 1, host));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment