Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
64e0e383
"vscode:/vscode.git/clone" did not exist on "9efcac38af58b7247e205c47efe090b4c6ec7574"
Unverified
Commit
64e0e383
authored
Mar 29, 2023
by
Woosuk Kwon
Committed by
GitHub
Mar 29, 2023
Browse files
Add cache watermark to avoid frequent cache eviction (#11)
parent
721fa3df
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
2 deletions
+8
-2
cacheflow/master/block_manager.py
cacheflow/master/block_manager.py
+8
-2
No files found.
cacheflow/master/block_manager.py
View file @
64e0e383
...
@@ -60,11 +60,15 @@ class BlockSpaceManager:
...
@@ -60,11 +60,15 @@ class BlockSpaceManager:
block_size
:
int
,
block_size
:
int
,
num_gpu_blocks
:
int
,
num_gpu_blocks
:
int
,
num_cpu_blocks
:
int
,
num_cpu_blocks
:
int
,
watermark
:
float
=
0.01
,
)
->
None
:
)
->
None
:
self
.
block_size
=
block_size
self
.
block_size
=
block_size
self
.
num_total_gpu_blocks
=
num_gpu_blocks
self
.
num_total_gpu_blocks
=
num_gpu_blocks
self
.
num_total_cpu_blocks
=
num_cpu_blocks
self
.
num_total_cpu_blocks
=
num_cpu_blocks
self
.
watermark
=
watermark
assert
watermark
>=
0.0
self
.
watermark_blocks
=
int
(
watermark
*
num_gpu_blocks
)
self
.
gpu_allocator
=
BlockAllocator
(
Device
.
GPU
,
block_size
,
num_gpu_blocks
)
self
.
gpu_allocator
=
BlockAllocator
(
Device
.
GPU
,
block_size
,
num_gpu_blocks
)
self
.
cpu_allocator
=
BlockAllocator
(
Device
.
CPU
,
block_size
,
num_cpu_blocks
)
self
.
cpu_allocator
=
BlockAllocator
(
Device
.
CPU
,
block_size
,
num_cpu_blocks
)
...
@@ -76,7 +80,8 @@ class BlockSpaceManager:
...
@@ -76,7 +80,8 @@ class BlockSpaceManager:
seq
=
seq_group
.
seqs
[
0
]
seq
=
seq_group
.
seqs
[
0
]
num_required_blocks
=
len
(
seq
.
logical_token_blocks
)
num_required_blocks
=
len
(
seq
.
logical_token_blocks
)
num_free_gpu_blocks
=
self
.
gpu_allocator
.
get_num_free_blocks
()
num_free_gpu_blocks
=
self
.
gpu_allocator
.
get_num_free_blocks
()
return
num_required_blocks
<=
num_free_gpu_blocks
# Use watermark to avoid frequent cache eviction.
return
num_free_gpu_blocks
-
num_required_blocks
>=
self
.
watermark_blocks
def
allocate
(
self
,
seq_group
:
SequenceGroup
)
->
None
:
def
allocate
(
self
,
seq_group
:
SequenceGroup
)
->
None
:
# NOTE: Here we assume that all sequences in the group have the same prompt.
# NOTE: Here we assume that all sequences in the group have the same prompt.
...
@@ -154,7 +159,8 @@ class BlockSpaceManager:
...
@@ -154,7 +159,8 @@ class BlockSpaceManager:
# NOTE: Conservatively, we assume that every sequence will allocate
# NOTE: Conservatively, we assume that every sequence will allocate
# at least one free block right after the swap-in.
# at least one free block right after the swap-in.
# NOTE: This should match the logic in can_append().
# NOTE: This should match the logic in can_append().
return
len
(
blocks
)
+
num_swapped_seqs
<=
num_free_blocks
num_required_blocks
=
len
(
blocks
)
+
num_swapped_seqs
return
num_free_blocks
-
num_required_blocks
>=
self
.
watermark_blocks
def
swap_in
(
self
,
seq_group
:
SequenceGroup
)
->
Dict
[
int
,
int
]:
def
swap_in
(
self
,
seq_group
:
SequenceGroup
)
->
Dict
[
int
,
int
]:
# CPU block -> GPU block.
# CPU block -> GPU block.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment