Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
20cfcdec
"vllm/vscode:/vscode.git/clone" did not exist on "c83c4ff815f57f57194b99828368f5785ca4e1cc"
Unverified
Commit
20cfcdec
authored
May 08, 2024
by
youkaichao
Committed by
GitHub
May 08, 2024
Browse files
[Core][Optimization] change python dict to pytorch tensor for blocks to swap (#4659)
parent
ad932a22
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
20 additions
and
7 deletions
+20
-7
vllm/worker/worker.py
vllm/worker/worker.py
+20
-7
No files found.
vllm/worker/worker.py
View file @
20cfcdec
...
...
@@ -195,15 +195,14 @@ class Worker(WorkerBase):
def
cache_swap
(
self
,
blocks_to_swap_in
:
Dict
[
int
,
int
]
,
blocks_to_swap_out
:
Dict
[
int
,
int
]
,
blocks_to_swap_in
:
torch
.
Tensor
,
blocks_to_swap_out
:
torch
.
Tensor
,
blocks_to_copy
:
torch
.
Tensor
,
)
->
None
:
# Issue cache operations.
# TODO(woosuk): Profile swapping overhead and optimize if needed.
if
blocks_to_swap_in
:
if
blocks_to_swap_in
.
numel
()
>
0
:
self
.
cache_engine
.
swap_in
(
blocks_to_swap_in
)
if
blocks_to_swap_out
:
if
blocks_to_swap_out
.
numel
()
>
0
:
self
.
cache_engine
.
swap_out
(
blocks_to_swap_out
)
if
blocks_to_copy
.
numel
()
>
0
:
self
.
cache_engine
.
copy
(
blocks_to_copy
)
...
...
@@ -219,12 +218,26 @@ class Worker(WorkerBase):
else
:
seq_group_metadata_list
=
execute_model_req
.
seq_group_metadata_list
blocks_to_swap_in
:
torch
.
Tensor
blocks_to_swap_out
:
torch
.
Tensor
blocks_to_copy
:
torch
.
Tensor
if
self
.
is_driver_worker
:
assert
seq_group_metadata_list
is
not
None
assert
execute_model_req
is
not
None
num_seq_groups
=
len
(
seq_group_metadata_list
)
blocks_to_swap_in
=
execute_model_req
.
blocks_to_swap_in
blocks_to_swap_out
=
execute_model_req
.
blocks_to_swap_out
# `blocks_to_swap_in` and `blocks_to_swap_out` are cpu tensors.
# they contain parameters to launch cudamemcpyasync.
blocks_to_swap_in
=
torch
.
tensor
(
execute_model_req
.
blocks_to_swap_in
,
device
=
"cpu"
,
dtype
=
torch
.
int64
).
view
(
-
1
,
2
)
blocks_to_swap_out
=
torch
.
tensor
(
execute_model_req
.
blocks_to_swap_out
,
device
=
"cpu"
,
dtype
=
torch
.
int64
).
view
(
-
1
,
2
)
# `blocks_to_copy` is a gpu tensor. The src and tgt of
# blocks to copy are in the same device, and `blocks_to_copy`
# can be used directly within cuda kernels.
blocks_to_copy
=
torch
.
tensor
(
execute_model_req
.
blocks_to_copy
,
device
=
self
.
device
,
dtype
=
torch
.
int64
).
view
(
-
1
,
2
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment