Unverified Commit 7596f3f1 authored by Umang Yadav's avatar Umang Yadav Committed by GitHub
Browse files

Avoid registering host buffer ptr multiple times during hip copies (#1245)

Currently, while copying a host buffer to the device, it first registers/maps the host buffer pointer to address space of the device.

If the host buffer has been allocated by the hipHostMalloc then, it is implicitly registered to the device's address space, and no need to register again. This PR adds a check for the same.
parent afdc3051
...@@ -23,13 +23,13 @@ ...@@ -23,13 +23,13 @@
*/ */
#include <migraphx/gpu/hip.hpp> #include <migraphx/gpu/hip.hpp>
#include <migraphx/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraphx/register_op.hpp> #include <migraphx/register_op.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/contiguous.hpp> #include <migraphx/gpu/device/contiguous.hpp>
#include <miopen/miopen.h> #include <miopen/miopen.h>
#include <memory>
#include <mutex>
#include <vector> #include <vector>
namespace migraphx { namespace migraphx {
...@@ -77,12 +77,38 @@ void* get_device_ptr(void* hptr) ...@@ -77,12 +77,38 @@ void* get_device_ptr(void* hptr)
return result; return result;
} }
hip_ptr allocate_gpu(std::size_t sz, bool host = false) struct host_ptr_cache
{
std::unordered_map<void*, std::weak_ptr<void>> cache;
std::mutex m;
std::shared_ptr<void> get(void* ptr)
{
std::lock_guard<std::mutex> lock(m);
auto it = cache.find(ptr);
if(it != cache.end())
return it->second.lock();
return nullptr;
}
void put(const std::shared_ptr<void>& p)
{
std::lock_guard<std::mutex> lock(m);
cache[p.get()] = p;
}
};
static host_ptr_cache& get_host_ptr_cache()
{
static host_ptr_cache cache;
return cache;
}
std::shared_ptr<void> allocate_gpu(std::size_t sz, bool host = false)
{ {
if(sz > get_available_gpu_memory()) if(sz > get_available_gpu_memory())
MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz)); MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
void* result = nullptr; void* alloc_ptr = nullptr;
auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz); auto status = host ? hipHostMalloc(&alloc_ptr, sz) : hipMalloc(&alloc_ptr, sz);
if(status != hipSuccess) if(status != hipSuccess)
{ {
if(host) if(host)
...@@ -90,16 +116,28 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false) ...@@ -90,16 +116,28 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
else else
return allocate_gpu(sz, true); return allocate_gpu(sz, true);
} }
assert(result != nullptr); assert(alloc_ptr != nullptr);
return hip_ptr{result}; std::shared_ptr<void> result = share(hip_ptr{alloc_ptr});
if(host)
{
get_host_ptr_cache().put(result);
}
return result;
} }
hip_host_ptr register_on_gpu(void* ptr, std::size_t sz) std::shared_ptr<void> register_on_gpu(void* ptr, std::size_t sz)
{ {
std::shared_ptr<void> result = get_host_ptr_cache().get(ptr);
if(result)
{
return result;
}
auto status = hipHostRegister(ptr, sz, hipHostRegisterMapped); auto status = hipHostRegister(ptr, sz, hipHostRegisterMapped);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPHX_THROW("Gpu register failed: " + hip_error(status)); MIGRAPHX_THROW("Gpu register failed: " + hip_error(status));
return hip_host_ptr{ptr}; result = share(hip_host_ptr{ptr});
get_host_ptr_cache().put(result);
return result;
} }
template <class T> template <class T>
...@@ -115,7 +153,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz) ...@@ -115,7 +153,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
return result; return result;
} }
hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false) std::shared_ptr<void> write_to_gpu(const void* x, std::size_t sz, bool host = false)
{ {
gpu_sync(); gpu_sync();
auto result = allocate_gpu(sz, host); auto result = allocate_gpu(sz, host);
...@@ -137,22 +175,21 @@ hip_ptr write_to_gpu(const T& x) ...@@ -137,22 +175,21 @@ hip_ptr write_to_gpu(const T& x)
argument allocate_gpu(const shape& s, bool host) argument allocate_gpu(const shape& s, bool host)
{ {
auto p = share(allocate_gpu(s.bytes() + 1, host)); auto p = allocate_gpu(s.bytes() + 1, host);
return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }}; return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
} }
argument register_on_gpu(const argument& arg) argument register_on_gpu(const argument& arg)
{ {
auto arg_shared = arg.share(); auto arg_shared = arg.share();
auto p = share(register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes())); auto p = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes());
return {arg_shared.get_shape(), [p, a = std::move(arg_shared)]() mutable { return {arg_shared.get_shape(),
return get_device_ptr(p.get()); [p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }};
}}; // namespace gpu }
} // namespace MIGRAPHX_INLINE_NS
argument to_gpu(const argument& arg, bool host) argument to_gpu(const argument& arg, bool host)
{ {
auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes(), host)); auto p = write_to_gpu(arg.data(), arg.get_shape().bytes(), host);
return {arg.get_shape(), p}; return {arg.get_shape(), p};
} }
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <iostream>
#include <vector>
#include <hip/hip_runtime_api.h>
#include <migraphx/gpu/target.hpp>
#include <migraphx/verify.hpp>
#include <test.hpp>
#include <basic_ops.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/make_op.hpp>
#define MIGRAPHX_HIP_ASSERT(x) (EXPECT(x == hipSuccess))
TEST_CASE(host_same_buffer_copy)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape ss{migraphx::shape::float_type, {4, 2}};
auto a = mm->add_parameter("a", ss);
auto b = mm->add_parameter("b", ss);
auto aa = mm->add_instruction(migraphx::make_op("add"), a, a);
auto gpu_out = mm->add_instruction(migraphx::make_op("hip::copy_from_gpu"), aa);
auto stream_sync = mm->add_instruction(migraphx::make_op("hip::sync_stream"), gpu_out);
auto pass = mm->add_instruction(unary_pass_op{}, stream_sync);
auto alloc = mm->add_instruction(
migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ss)}}));
auto gpu_in = mm->add_instruction(migraphx::make_op("hip::copy_to_gpu"), pass, alloc);
auto aab = mm->add_instruction(migraphx::make_op("add"), gpu_in, b);
mm->add_return({aab});
migraphx::parameter_map pp;
std::vector<float> a_vec(ss.elements(), -1);
std::vector<float> b_vec(ss.elements(), 2);
std::vector<float> c_vec(ss.elements(), 0);
pp["a"] = migraphx::argument(ss, a_vec.data());
pp["b"] = migraphx::argument(ss, b_vec.data());
std::vector<float> gpu_result;
migraphx::target gpu_t = migraphx::gpu::target{};
migraphx::compile_options options;
options.offload_copy = true;
p.compile(gpu_t, options);
auto result = p.eval(pp).back();
std::vector<float> results_vector(ss.elements(), -1);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(c_vec, results_vector));
}
TEST_CASE(arguments_lifetime)
{
auto use_on_gpu = [](const migraphx::argument& arg, int c) {
auto* arg_ptr = arg.data();
MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
MIGRAPHX_HIP_ASSERT(hipMemset(arg_ptr, c, arg.get_shape().bytes()));
MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize());
return;
};
auto f = [use_on_gpu](const migraphx::argument& input) {
auto a = migraphx::gpu::register_on_gpu(input);
auto s = a.get_shape();
{
auto b = migraphx::gpu::register_on_gpu(input);
use_on_gpu(b, 0);
std::vector<float> expected_b(s.elements(), 0);
auto gold = migraphx::argument(s, expected_b.data());
}
use_on_gpu(a, 1);
return true;
};
migraphx::shape ss{migraphx::shape::float_type, {4, 2}};
std::vector<float> x_data(ss.elements(), -1);
migraphx::argument x{ss, x_data.data()};
EXPECT(f(x));
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment