Add more syncs

bf2ee683 · Paul · 088dc5eb · bf2ee683
Commit bf2ee683 authored Oct 26, 2018 by Paul
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 9 deletions

src/targets/gpu/hip.cpp src/targets/gpu/hip.cpp +11 -9

No files found.
--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -27,6 +27,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
    if(sz > get_available_gpu_memory())
        MIGRAPH_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
    void* result;
+    gpu_sync();
    auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz);
    if(status != hipSuccess)
    {
@@ -39,19 +40,12 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
    return hip_ptr{result};
 }
-template <class T>
-hip_ptr write_to_gpu(const T& x)
-{
-    using type = typename T::value_type;
-    auto size  = x.size() * sizeof(type);
-    return write_to_gpu(x.data(), size);
-}
 template <class T>
 std::vector<T> read_from_gpu(const void* x, std::size_t sz)
 {
-    gpu_sync();
    std::vector<T> result(sz);
+    gpu_sync();
    auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
    if(status != hipSuccess)
        MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
@@ -61,8 +55,8 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
 hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
 {
-    gpu_sync();
    auto result = allocate_gpu(sz, host);
+    gpu_sync();
    auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
    if(status != hipSuccess)
        MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
@@ -70,6 +64,14 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
    return result;
 }
+template <class T>
+hip_ptr write_to_gpu(const T& x)
+{
+    using type = typename T::value_type;
+    auto size  = x.size() * sizeof(type);
+    return write_to_gpu(x.data(), size);
+}
 argument allocate_gpu(const shape& s, bool host)
 {
    auto p = share(allocate_gpu(s.bytes() + 1, host));