Revert "Add persistent_host_memory_allocator to allocate host memory used by...

Revert "Add persistent_host_memory_allocator to allocate host memory used by H2D/D2H transfer in hipGraph capturing" This reverts commit 69a6dc74.

Revert "Add persistent_host_memory_allocator to allocate host memory used by...
Revert "Add persistent_host_memory_allocator to allocate host memory used by H2D/D2H transfer in hipGraph capturing" This reverts commit 69a6dc74.
8301d8dd · Qianfeng Zhang · 69a6dc74 · 8301d8dd · 8301d8dd · 8301d8dd
Commit 8301d8dd authored Sep 18, 2023 by Qianfeng Zhang
4 changed files
--- a/example/52_flash_atten_bias/grouped_multihead_attention_bias_forward_v2.cpp
+++ b/example/52_flash_atten_bias/grouped_multihead_attention_bias_forward_v2.cpp
@@ -19,7 +19,6 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g
 #include "ck/ck.hpp"
 #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
 #include "ck/tensor_operation/gpu/device/tensor_specialization.hpp"
-#include "ck/tensor_operation/gpu/device/persistent_host_memory_allocator.hpp"
 #include "ck/tensor_operation/gpu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v2.hpp"
 #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"


--- a/example/52_flash_atten_bias/run_grouped_multihead_attention_bias_forward.inc
+++ b/example/52_flash_atten_bias/run_grouped_multihead_attention_bias_forward.inc
@@ -349,9 +349,6 @@ int run(int argc, char* argv[])

        HIP_CHECK_ERROR(hipStreamSynchronize(stream));

-        ck::tensor_operation::device::getPersistentHostMemoryAllocatorPtr()->releaseWithStream(
-            stream);
-
        for(std::size_t i = 0; i < group_count; i++)
        {
            const int& G0 = g0_g1_m_n_k_o[i][0];

--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v2.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v2.hpp
@@ -15,7 +15,6 @@
 #include "ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp"
 #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
 #include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
-#include "ck/tensor_operation/gpu/device/persistent_host_memory_allocator.hpp"
 #include "ck/tensor_operation/gpu/grid/gridwise_batched_mha_fwd_xdl_cshuffle_v2.hpp"
 #include "ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp"
 #include "ck/host_utility/device_prop.hpp"
@@ -924,8 +923,8 @@ struct DeviceGroupedMultiheadAttentionForward_Xdl_CShuffle_V2

                size_t copy_size = arg.group_kernel_args_.size() * sizeof(GroupKernelArg);

-                void* persistent_ptr = getPersistentHostMemoryAllocatorPtr()->allocate(
-                    copy_size, stream_config.stream_id_);
+                // ToDO: when to release this memory buffer?
+                char* persistent_ptr = new char[copy_size];

                (void)std::memcpy(persistent_ptr, arg.group_kernel_args_.data(), copy_size);


--- a/include/ck/tensor_operation/gpu/device/persistent_host_memory_allocator.hpp
+++ b/include/ck/tensor_operation/gpu/device/persistent_host_memory_allocator.hpp
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
-
-#pragma once
-
-#include <mutex>
-#include <map>
-#include <vector>
-
-#include <ck/ck.hpp>
-
-namespace ck {
-namespace tensor_operation {
-namespace device {
-
-// ToDO: move the implementation to cpp file
-// Allocator used for allocating persisent host memory buffers used as src/dst for
-// H2D or D2H transfers, host memory persistency is required when hipGraph is used.
-class PersistentHostMemoryAllocator
-{
-    private:
-    static PersistentHostMemoryAllocator* singleton_;
-
-    std::map<hipStream_t, std::vector<char*>> buffers_;
-    std::mutex mtx_;
-
-    protected:
-    PersistentHostMemoryAllocator(){};
-
-    public:
-    void* allocate(size_t sizeInBytes, hipStream_t stream)
-    {
-        std::lock_guard<std::mutex> lck(mtx_);
-
-        auto it = buffers_.find(stream);
-
-        if(it != buffers_.end())
-        {
-            char* new_buf = new char[sizeInBytes];
-            it->second.push_back(new_buf);
-
-            return new_buf;
-        }
-        else
-        {
-            // allocate a buffer and keep it for the stream
-            char* new_buf = new char[sizeInBytes];
-
-            std::vector<char*> tmp_vec = {new_buf};
-
-            buffers_.insert(std::make_pair(stream, tmp_vec));
-
-            return new_buf;
-        };
-    };
-
-    void releaseWithStream(hipStream_t stream)
-    {
-        std::lock_guard<std::mutex> lck(mtx_);
-
-        auto it = buffers_.find(stream);
-
-        if(it != buffers_.end())
-        {
-            for(auto buf : it->second)
-                delete[] buf;
-
-            it->second.clear();
-        }
-    };
-
-    void releaseAll()
-    {
-        std::lock_guard<std::mutex> lck(mtx_);
-
-        auto it = buffers_.begin();
-
-        while(it != buffers_.end())
-        {
-            for(auto buf : it->second)
-                delete[] buf;
-
-            it->second.clear();
-
-            ++it;
-        }
-    };
-
-    static PersistentHostMemoryAllocator* getPersistentHostMemoryAllocatorPtr()
-    {
-        if(singleton_ == nullptr)
-            singleton_ = new PersistentHostMemoryAllocator();
-
-        return singleton_;
-    };
-
-    PersistentHostMemoryAllocator(const PersistentHostMemoryAllocator&) = delete;
-    PersistentHostMemoryAllocator(PersistentHostMemoryAllocator&&)      = delete;
-    PersistentHostMemoryAllocator& operator=(const PersistentHostMemoryAllocator&) = delete;
-    PersistentHostMemoryAllocator& operator=(PersistentHostMemoryAllocator&&) = delete;
-};
-
-PersistentHostMemoryAllocator* PersistentHostMemoryAllocator::singleton_ = nullptr;
-
-// ToDo:  move this to cpp file
-static PersistentHostMemoryAllocator* getPersistentHostMemoryAllocatorPtr()
-{
-    return PersistentHostMemoryAllocator::getPersistentHostMemoryAllocatorPtr();
-};
-
-} // namespace device
-} // namespace tensor_operation
-} // namespace ck