Unverified Commit 5d87c20f authored by Lyu Han's avatar Lyu Han Committed by GitHub
Browse files

Fix memory leak (#488)

* Fix memory leak

* modern c++
parent 97dcdff7
......@@ -72,6 +72,10 @@ LlamaWeight<T>::~LlamaWeight()
pre_decoder_embedding_table = nullptr;
post_decoder_embedding_kernel = nullptr;
for (auto& p : decoder_layer_weights) {
delete p;
}
}
template<typename T>
......
......@@ -249,13 +249,13 @@ std::unique_ptr<LlamaTritonSharedModelInstance<T>> LlamaTritonModel<T>::createSh
cuda_device_prop_ptr.get());
return std::make_unique<LlamaTritonSharedModelInstance<T>>(
LlamaTritonSharedModelInstance<T>{std::move(llama),
shared_weights_[device_id],
std::move(allocator),
LlamaTritonSharedModelInstance<T>{std::move(allocator),
std::move(cublas_algo_map),
std::move(cublas_wrapper_mutex),
std::move(cublas_wrapper),
std::move(cuda_device_prop_ptr),
shared_weights_[device_id],
std::move(llama),
session_len_});
}
......
......@@ -29,13 +29,13 @@ namespace ft = turbomind;
template<typename T>
struct LlamaTritonSharedModelInstance {
std::unique_ptr<ft::LlamaV2<T>> llm;
std::shared_ptr<ft::LlamaWeight<T>> llm_weight;
std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator;
std::unique_ptr<ft::cublasAlgoMap> cublas_algo_map;
std::unique_ptr<std::mutex> cublas_wrapper_mutex;
std::unique_ptr<ft::cublasMMWrapper> cublas_wrapper;
std::unique_ptr<cudaDeviceProp> cuda_device_prop_ptr;
std::shared_ptr<ft::LlamaWeight<T>> llm_weight;
std::unique_ptr<ft::LlamaV2<T>> llm;
const int session_len;
};
......
......@@ -271,6 +271,7 @@ struct AbstractTransformerModel;
struct AbstractTransformerModelInstance;
struct AbstractTransformerModelInstance {
virtual ~AbstractTransformerModelInstance() = default;
virtual std::shared_ptr<std::vector<triton::Tensor>>
forward(std::shared_ptr<std::vector<triton::Tensor>> input_tensors) = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment