Fix out-of-bounds access when generate uses nonpacked tensors (#1160)

out-of-bounds access when generate uses nonpacked tensors and add some additional asserts for gpu memory.

Fix out-of-bounds access when generate uses nonpacked tensors (#1160)
out-of-bounds access when generate uses nonpacked tensors and add some additional asserts for gpu memory.
262ba721 · Paul Fultz II · GitHub · 88b3dd34 · 262ba721 · 262ba721
Unverified Commit 262ba721 authored Apr 12, 2022 by Paul Fultz II Committed by GitHub Apr 12, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 6 deletions

src/include/migraphx/generate.hpp src/include/migraphx/generate.hpp +4 -4

src/targets/gpu/hip.cpp src/targets/gpu/hip.cpp +16 -2

No files found.
--- a/src/include/migraphx/generate.hpp
+++ b/src/include/migraphx/generate.hpp
@@ -88,16 +88,16 @@ struct xorshift_generator
 template <class T>
 auto generate_tensor_data(const migraphx::shape& s, unsigned long seed = 0)
 {
-    auto result = make_shared_array<T>(s.elements());
+    auto result = make_shared_array<T>(s.element_space());
-    std::generate(result.get(), result.get() + s.elements(), xorshf96_generator<T>{seed});
+    std::generate(result.get(), result.get() + s.element_space(), xorshf96_generator<T>{seed});
    return result;
 }
 template <class T>
 auto fill_tensor_data(const migraphx::shape& s, unsigned long value = 0)
 {
-    auto result = make_shared_array<T>(s.elements());
+    auto result = make_shared_array<T>(s.element_space());
-    std::generate(result.get(), result.get() + s.elements(), [=] { return value; });
+    std::generate(result.get(), result.get() + s.element_space(), [=] { return value; });
    return result;
 }

--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -27,6 +27,15 @@ using hip_host_ptr = MIGRAPHX_MANAGE_PTR(void, hipHostUnregister);
 std::string hip_error(int error) { return hipGetErrorString(static_cast<hipError_t>(error)); }
+bool is_device_ptr(const void* ptr)
+{
+    hipPointerAttribute_t attr;
+    auto status = hipPointerGetAttributes(&attr, ptr);
+    if(status != hipSuccess)
+        return false;
+    return attr.memoryType == hipMemoryTypeDevice;
+}
 std::size_t get_available_gpu_memory()
 {
    size_t free;
@@ -50,8 +59,8 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
 {
    if(sz > get_available_gpu_memory())
        MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
-    void* result;
+    void* result = nullptr;
-    auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz);
+    auto status  = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz);
    if(status != hipSuccess)
    {
        if(host)
@@ -59,6 +68,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
        else
            return allocate_gpu(sz, true);
    }
+    assert(result != nullptr);
    return hip_ptr{result};
 }
@@ -75,6 +85,8 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
 {
    gpu_sync();
    std::vector<T> result(sz);
+    assert(not is_device_ptr(result.data()));
+    assert(is_device_ptr(x));
    auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
    if(status != hipSuccess)
        MIGRAPHX_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
@@ -85,6 +97,8 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
 {
    gpu_sync();
    auto result = allocate_gpu(sz, host);
+    assert(is_device_ptr(result.get()));
+    assert(not is_device_ptr(x));
    auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
    if(status != hipSuccess)
        MIGRAPHX_THROW("Copy to gpu failed: " + hip_error(status));