Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Qwen_lmdeploy
Commits
5d87c20f
Unverified
Commit
5d87c20f
authored
Sep 26, 2023
by
Lyu Han
Committed by
GitHub
Sep 26, 2023
Browse files
Fix memory leak (#488)
* Fix memory leak * modern c++
parent
97dcdff7
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
10 additions
and
5 deletions
+10
-5
src/turbomind/models/llama/LlamaWeight.cc
src/turbomind/models/llama/LlamaWeight.cc
+4
-0
src/turbomind/triton_backend/llama/LlamaTritonModel.cc
src/turbomind/triton_backend/llama/LlamaTritonModel.cc
+3
-3
src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h
...turbomind/triton_backend/llama/LlamaTritonModelInstance.h
+2
-2
src/turbomind/triton_backend/transformer_triton_backend.hpp
src/turbomind/triton_backend/transformer_triton_backend.hpp
+1
-0
No files found.
src/turbomind/models/llama/LlamaWeight.cc
View file @
5d87c20f
...
@@ -72,6 +72,10 @@ LlamaWeight<T>::~LlamaWeight()
...
@@ -72,6 +72,10 @@ LlamaWeight<T>::~LlamaWeight()
pre_decoder_embedding_table
=
nullptr
;
pre_decoder_embedding_table
=
nullptr
;
post_decoder_embedding_kernel
=
nullptr
;
post_decoder_embedding_kernel
=
nullptr
;
for
(
auto
&
p
:
decoder_layer_weights
)
{
delete
p
;
}
}
}
template
<
typename
T
>
template
<
typename
T
>
...
...
src/turbomind/triton_backend/llama/LlamaTritonModel.cc
View file @
5d87c20f
...
@@ -249,13 +249,13 @@ std::unique_ptr<LlamaTritonSharedModelInstance<T>> LlamaTritonModel<T>::createSh
...
@@ -249,13 +249,13 @@ std::unique_ptr<LlamaTritonSharedModelInstance<T>> LlamaTritonModel<T>::createSh
cuda_device_prop_ptr
.
get
());
cuda_device_prop_ptr
.
get
());
return
std
::
make_unique
<
LlamaTritonSharedModelInstance
<
T
>>
(
return
std
::
make_unique
<
LlamaTritonSharedModelInstance
<
T
>>
(
LlamaTritonSharedModelInstance
<
T
>
{
std
::
move
(
llama
),
LlamaTritonSharedModelInstance
<
T
>
{
std
::
move
(
allocator
),
shared_weights_
[
device_id
],
std
::
move
(
allocator
),
std
::
move
(
cublas_algo_map
),
std
::
move
(
cublas_algo_map
),
std
::
move
(
cublas_wrapper_mutex
),
std
::
move
(
cublas_wrapper_mutex
),
std
::
move
(
cublas_wrapper
),
std
::
move
(
cublas_wrapper
),
std
::
move
(
cuda_device_prop_ptr
),
std
::
move
(
cuda_device_prop_ptr
),
shared_weights_
[
device_id
],
std
::
move
(
llama
),
session_len_
});
session_len_
});
}
}
...
...
src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h
View file @
5d87c20f
...
@@ -29,13 +29,13 @@ namespace ft = turbomind;
...
@@ -29,13 +29,13 @@ namespace ft = turbomind;
template
<
typename
T
>
template
<
typename
T
>
struct
LlamaTritonSharedModelInstance
{
struct
LlamaTritonSharedModelInstance
{
std
::
unique_ptr
<
ft
::
LlamaV2
<
T
>>
llm
;
std
::
shared_ptr
<
ft
::
LlamaWeight
<
T
>>
llm_weight
;
std
::
unique_ptr
<
ft
::
Allocator
<
ft
::
AllocatorType
::
CUDA
>>
allocator
;
std
::
unique_ptr
<
ft
::
Allocator
<
ft
::
AllocatorType
::
CUDA
>>
allocator
;
std
::
unique_ptr
<
ft
::
cublasAlgoMap
>
cublas_algo_map
;
std
::
unique_ptr
<
ft
::
cublasAlgoMap
>
cublas_algo_map
;
std
::
unique_ptr
<
std
::
mutex
>
cublas_wrapper_mutex
;
std
::
unique_ptr
<
std
::
mutex
>
cublas_wrapper_mutex
;
std
::
unique_ptr
<
ft
::
cublasMMWrapper
>
cublas_wrapper
;
std
::
unique_ptr
<
ft
::
cublasMMWrapper
>
cublas_wrapper
;
std
::
unique_ptr
<
cudaDeviceProp
>
cuda_device_prop_ptr
;
std
::
unique_ptr
<
cudaDeviceProp
>
cuda_device_prop_ptr
;
std
::
shared_ptr
<
ft
::
LlamaWeight
<
T
>>
llm_weight
;
std
::
unique_ptr
<
ft
::
LlamaV2
<
T
>>
llm
;
const
int
session_len
;
const
int
session_len
;
};
};
...
...
src/turbomind/triton_backend/transformer_triton_backend.hpp
View file @
5d87c20f
...
@@ -271,6 +271,7 @@ struct AbstractTransformerModel;
...
@@ -271,6 +271,7 @@ struct AbstractTransformerModel;
struct
AbstractTransformerModelInstance
;
struct
AbstractTransformerModelInstance
;
struct
AbstractTransformerModelInstance
{
struct
AbstractTransformerModelInstance
{
virtual
~
AbstractTransformerModelInstance
()
=
default
;
virtual
std
::
shared_ptr
<
std
::
vector
<
triton
::
Tensor
>>
virtual
std
::
shared_ptr
<
std
::
vector
<
triton
::
Tensor
>>
forward
(
std
::
shared_ptr
<
std
::
vector
<
triton
::
Tensor
>>
input_tensors
)
=
0
;
forward
(
std
::
shared_ptr
<
std
::
vector
<
triton
::
Tensor
>>
input_tensors
)
=
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment