Commit b12209b0 authored by Paul's avatar Paul
Browse files

Use null stream by default

parent bf2ee683
...@@ -27,7 +27,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false) ...@@ -27,7 +27,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
if(sz > get_available_gpu_memory()) if(sz > get_available_gpu_memory())
MIGRAPH_THROW("Memory not available to allocate buffer: " + std::to_string(sz)); MIGRAPH_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
void* result; void* result;
gpu_sync(); // gpu_sync();
auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz); auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz);
if(status != hipSuccess) if(status != hipSuccess)
{ {
...@@ -36,31 +36,30 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false) ...@@ -36,31 +36,30 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
else else
allocate_gpu(sz, true); allocate_gpu(sz, true);
} }
gpu_sync(); // gpu_sync();
return hip_ptr{result}; return hip_ptr{result};
} }
template <class T> template <class T>
std::vector<T> read_from_gpu(const void* x, std::size_t sz) std::vector<T> read_from_gpu(const void* x, std::size_t sz)
{ {
std::vector<T> result(sz); std::vector<T> result(sz);
gpu_sync(); // gpu_sync();
auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost); auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
gpu_sync(); // gpu_sync();
return result; return result;
} }
hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false) hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
{ {
auto result = allocate_gpu(sz, host); auto result = allocate_gpu(sz, host);
gpu_sync(); // gpu_sync();
auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice); auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status)); MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
gpu_sync(); // gpu_sync();
return result; return result;
} }
......
...@@ -4,10 +4,13 @@ ...@@ -4,10 +4,13 @@
#include <migraph/gpu/miopen.hpp> #include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/rocblas.hpp> #include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/hip.hpp> #include <migraph/gpu/hip.hpp>
#include <migraph/env.hpp>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
MIGRAPH_DECLARE_ENV_VAR(MIGRAPH_DISABLE_NULL_STREAM)
struct hip_device struct hip_device
{ {
hip_device() { add_stream(); } hip_device() { add_stream(); }
...@@ -31,20 +34,33 @@ struct hip_device ...@@ -31,20 +34,33 @@ struct hip_device
return hip_stream_ptr{result}; return hip_stream_ptr{result};
} }
auto get() hipStream_t get()
{ {
if(enabled(MIGRAPH_DISABLE_NULL_STREAM{}))
{
set_device(id); set_device(id);
if(s == nullptr) if(s == nullptr)
s = create_stream(); s = create_stream();
assert(s.get() != nullptr); assert(s.get() != nullptr);
return s.get(); return s.get();
}
return nullptr;
}
auto create_miopen_handle()
{
if(enabled(MIGRAPH_DISABLE_NULL_STREAM{}))
return make_obj<miopen_handle>(&miopenCreateWithStream, get());
else
return make_obj<miopen_handle>(&miopenCreate);
} }
auto get_miopen() auto get_miopen()
{ {
set_device(id); set_device(id);
if(mihandle == nullptr) if(mihandle == nullptr)
mihandle = make_obj<miopen_handle>(&miopenCreateWithStream, get()); mihandle = create_miopen_handle();
assert(mihandle.get() != nullptr); assert(mihandle.get() != nullptr);
return mihandle.get(); return mihandle.get();
} }
......
...@@ -129,6 +129,7 @@ template <class V> ...@@ -129,6 +129,7 @@ template <class V>
void verify_program() void verify_program()
{ {
auto_print::set_terminate_handler(migraph::get_type_name<V>()); auto_print::set_terminate_handler(migraph::get_type_name<V>());
// std::cout << migraph::get_type_name<V>() << std::endl;
migraph::program cpu_prog; migraph::program cpu_prog;
migraph::program gpu_prog; migraph::program gpu_prog;
auto cpu_arg_f = detach_async([&] { return run_cpu<V>(cpu_prog); }); auto cpu_arg_f = detach_async([&] { return run_cpu<V>(cpu_prog); });
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment