Unverified Commit 5d87c20f authored by Lyu Han's avatar Lyu Han Committed by GitHub
Browse files

Fix memory leak (#488)

* Fix memory leak

* modern c++
parent 97dcdff7
...@@ -72,6 +72,10 @@ LlamaWeight<T>::~LlamaWeight() ...@@ -72,6 +72,10 @@ LlamaWeight<T>::~LlamaWeight()
pre_decoder_embedding_table = nullptr; pre_decoder_embedding_table = nullptr;
post_decoder_embedding_kernel = nullptr; post_decoder_embedding_kernel = nullptr;
for (auto& p : decoder_layer_weights) {
delete p;
}
} }
template<typename T> template<typename T>
......
...@@ -249,13 +249,13 @@ std::unique_ptr<LlamaTritonSharedModelInstance<T>> LlamaTritonModel<T>::createSh ...@@ -249,13 +249,13 @@ std::unique_ptr<LlamaTritonSharedModelInstance<T>> LlamaTritonModel<T>::createSh
cuda_device_prop_ptr.get()); cuda_device_prop_ptr.get());
return std::make_unique<LlamaTritonSharedModelInstance<T>>( return std::make_unique<LlamaTritonSharedModelInstance<T>>(
LlamaTritonSharedModelInstance<T>{std::move(llama), LlamaTritonSharedModelInstance<T>{std::move(allocator),
shared_weights_[device_id],
std::move(allocator),
std::move(cublas_algo_map), std::move(cublas_algo_map),
std::move(cublas_wrapper_mutex), std::move(cublas_wrapper_mutex),
std::move(cublas_wrapper), std::move(cublas_wrapper),
std::move(cuda_device_prop_ptr), std::move(cuda_device_prop_ptr),
shared_weights_[device_id],
std::move(llama),
session_len_}); session_len_});
} }
......
...@@ -29,13 +29,13 @@ namespace ft = turbomind; ...@@ -29,13 +29,13 @@ namespace ft = turbomind;
template<typename T> template<typename T>
struct LlamaTritonSharedModelInstance { struct LlamaTritonSharedModelInstance {
std::unique_ptr<ft::LlamaV2<T>> llm;
std::shared_ptr<ft::LlamaWeight<T>> llm_weight;
std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator; std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator;
std::unique_ptr<ft::cublasAlgoMap> cublas_algo_map; std::unique_ptr<ft::cublasAlgoMap> cublas_algo_map;
std::unique_ptr<std::mutex> cublas_wrapper_mutex; std::unique_ptr<std::mutex> cublas_wrapper_mutex;
std::unique_ptr<ft::cublasMMWrapper> cublas_wrapper; std::unique_ptr<ft::cublasMMWrapper> cublas_wrapper;
std::unique_ptr<cudaDeviceProp> cuda_device_prop_ptr; std::unique_ptr<cudaDeviceProp> cuda_device_prop_ptr;
std::shared_ptr<ft::LlamaWeight<T>> llm_weight;
std::unique_ptr<ft::LlamaV2<T>> llm;
const int session_len; const int session_len;
}; };
......
...@@ -271,6 +271,7 @@ struct AbstractTransformerModel; ...@@ -271,6 +271,7 @@ struct AbstractTransformerModel;
struct AbstractTransformerModelInstance; struct AbstractTransformerModelInstance;
struct AbstractTransformerModelInstance { struct AbstractTransformerModelInstance {
virtual ~AbstractTransformerModelInstance() = default;
virtual std::shared_ptr<std::vector<triton::Tensor>> virtual std::shared_ptr<std::vector<triton::Tensor>>
forward(std::shared_ptr<std::vector<triton::Tensor>> input_tensors) = 0; forward(std::shared_ptr<std::vector<triton::Tensor>> input_tensors) = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment