Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e441bad6
Unverified
Commit
e441bad6
authored
Jun 17, 2024
by
youkaichao
Committed by
GitHub
Jun 17, 2024
Browse files
[Optimization] use a pool to reuse LogicalTokenBlock.token_ids (#5584)
parent
1b44aaf4
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
39 additions
and
2 deletions
+39
-2
vllm/block.py
vllm/block.py
+39
-2
No files found.
vllm/block.py
View file @
e441bad6
"""Token blocks."""
from
typing
import
List
import
weakref
from
collections
import
defaultdict
from
typing
import
Dict
,
List
from
vllm.utils
import
Device
...
...
@@ -7,6 +9,35 @@ _BLANK_TOKEN_ID = -1
DEFAULT_LAST_ACCESSED_TIME
=
-
1
TokensBlock
=
List
[
int
]
class
BlockPool
:
"""A pool of physical blocks.
When requests come, we create a lot of logical blocks;
when requests are done, we destroy a lot of logical blocks.
It turns out that creating and destroying logical blocks can be expensive,
especially for the `token_ids` field, which is a list of integers.
To avoid this overhead, we use a pool to manage the logical blocks.
When an old request is done and a new request comes, we can reuse the
logical blocks from the old request to feed the new request.
"""
def
__init__
(
self
)
->
None
:
# block size to list of token blocks
self
.
pool
:
Dict
[
int
,
List
[
TokensBlock
]]
=
defaultdict
(
list
)
def
alloc_block
(
self
,
block_size
:
int
)
->
TokensBlock
:
if
block_size
in
self
.
pool
and
self
.
pool
[
block_size
]:
return
self
.
pool
[
block_size
].
pop
()
return
[
_BLANK_TOKEN_ID
]
*
block_size
def
del_block
(
self
,
block
:
TokensBlock
)
->
None
:
self
.
pool
[
len
(
block
)].
append
(
block
)
_BLOCK_POOL
=
BlockPool
()
class
LogicalTokenBlock
:
"""A block that stores a contiguous chunk of tokens from left to right.
...
...
@@ -23,7 +54,13 @@ class LogicalTokenBlock:
self
.
block_number
=
block_number
self
.
block_size
=
block_size
self
.
token_ids
=
[
_BLANK_TOKEN_ID
]
*
block_size
self
.
token_ids
=
_BLOCK_POOL
.
alloc_block
(
block_size
)
# this finalizer is used to return the block to the pool when the object is deleted # noqa
# NOTE: don't use __del__ because it cannot guarantee the order of finalization, # noqa
# i.e. `self.token_ids` may be deleted before `self`, and we lose
# the opportunity to return the block to the pool
self
.
_finalizer
=
weakref
.
finalize
(
self
,
_BLOCK_POOL
.
del_block
,
self
.
token_ids
)
self
.
num_tokens
=
0
def
is_empty
(
self
)
->
bool
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment