Commit 8301d8dd authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Revert "Add persistent_host_memory_allocator to allocate host memory used by...

Revert "Add persistent_host_memory_allocator to allocate host memory used by H2D/D2H transfer in hipGraph capturing"

This reverts commit 69a6dc74.
parent 69a6dc74
......@@ -19,7 +19,6 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_specialization.hpp"
#include "ck/tensor_operation/gpu/device/persistent_host_memory_allocator.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v2.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
......
......@@ -349,9 +349,6 @@ int run(int argc, char* argv[])
HIP_CHECK_ERROR(hipStreamSynchronize(stream));
ck::tensor_operation::device::getPersistentHostMemoryAllocatorPtr()->releaseWithStream(
stream);
for(std::size_t i = 0; i < group_count; i++)
{
const int& G0 = g0_g1_m_n_k_o[i][0];
......
......@@ -15,7 +15,6 @@
#include "ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/tensor_operation/gpu/device/persistent_host_memory_allocator.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_batched_mha_fwd_xdl_cshuffle_v2.hpp"
#include "ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp"
#include "ck/host_utility/device_prop.hpp"
......@@ -924,8 +923,8 @@ struct DeviceGroupedMultiheadAttentionForward_Xdl_CShuffle_V2
size_t copy_size = arg.group_kernel_args_.size() * sizeof(GroupKernelArg);
void* persistent_ptr = getPersistentHostMemoryAllocatorPtr()->allocate(
copy_size, stream_config.stream_id_);
// ToDO: when to release this memory buffer?
char* persistent_ptr = new char[copy_size];
(void)std::memcpy(persistent_ptr, arg.group_kernel_args_.data(), copy_size);
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <mutex>
#include <map>
#include <vector>
#include <ck/ck.hpp>
namespace ck {
namespace tensor_operation {
namespace device {
// ToDO: move the implementation to cpp file
// Allocator used for allocating persisent host memory buffers used as src/dst for
// H2D or D2H transfers, host memory persistency is required when hipGraph is used.
class PersistentHostMemoryAllocator
{
private:
static PersistentHostMemoryAllocator* singleton_;
std::map<hipStream_t, std::vector<char*>> buffers_;
std::mutex mtx_;
protected:
PersistentHostMemoryAllocator(){};
public:
void* allocate(size_t sizeInBytes, hipStream_t stream)
{
std::lock_guard<std::mutex> lck(mtx_);
auto it = buffers_.find(stream);
if(it != buffers_.end())
{
char* new_buf = new char[sizeInBytes];
it->second.push_back(new_buf);
return new_buf;
}
else
{
// allocate a buffer and keep it for the stream
char* new_buf = new char[sizeInBytes];
std::vector<char*> tmp_vec = {new_buf};
buffers_.insert(std::make_pair(stream, tmp_vec));
return new_buf;
};
};
void releaseWithStream(hipStream_t stream)
{
std::lock_guard<std::mutex> lck(mtx_);
auto it = buffers_.find(stream);
if(it != buffers_.end())
{
for(auto buf : it->second)
delete[] buf;
it->second.clear();
}
};
void releaseAll()
{
std::lock_guard<std::mutex> lck(mtx_);
auto it = buffers_.begin();
while(it != buffers_.end())
{
for(auto buf : it->second)
delete[] buf;
it->second.clear();
++it;
}
};
static PersistentHostMemoryAllocator* getPersistentHostMemoryAllocatorPtr()
{
if(singleton_ == nullptr)
singleton_ = new PersistentHostMemoryAllocator();
return singleton_;
};
PersistentHostMemoryAllocator(const PersistentHostMemoryAllocator&) = delete;
PersistentHostMemoryAllocator(PersistentHostMemoryAllocator&&) = delete;
PersistentHostMemoryAllocator& operator=(const PersistentHostMemoryAllocator&) = delete;
PersistentHostMemoryAllocator& operator=(PersistentHostMemoryAllocator&&) = delete;
};
PersistentHostMemoryAllocator* PersistentHostMemoryAllocator::singleton_ = nullptr;
// ToDo: move this to cpp file
static PersistentHostMemoryAllocator* getPersistentHostMemoryAllocatorPtr()
{
return PersistentHostMemoryAllocator::getPersistentHostMemoryAllocatorPtr();
};
} // namespace device
} // namespace tensor_operation
} // namespace ck
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment