"vllm/vscode:/vscode.git/clone" did not exist on "c83c4ff815f57f57194b99828368f5785ca4e1cc"
Unverified Commit 20cfcdec authored by youkaichao's avatar youkaichao Committed by GitHub
Browse files

[Core][Optimization] change python dict to pytorch tensor for blocks to swap (#4659)

parent ad932a22
......@@ -195,15 +195,14 @@ class Worker(WorkerBase):
def cache_swap(
self,
blocks_to_swap_in: Dict[int, int],
blocks_to_swap_out: Dict[int, int],
blocks_to_swap_in: torch.Tensor,
blocks_to_swap_out: torch.Tensor,
blocks_to_copy: torch.Tensor,
) -> None:
# Issue cache operations.
# TODO(woosuk): Profile swapping overhead and optimize if needed.
if blocks_to_swap_in:
if blocks_to_swap_in.numel() > 0:
self.cache_engine.swap_in(blocks_to_swap_in)
if blocks_to_swap_out:
if blocks_to_swap_out.numel() > 0:
self.cache_engine.swap_out(blocks_to_swap_out)
if blocks_to_copy.numel() > 0:
self.cache_engine.copy(blocks_to_copy)
......@@ -219,12 +218,26 @@ class Worker(WorkerBase):
else:
seq_group_metadata_list = execute_model_req.seq_group_metadata_list
blocks_to_swap_in: torch.Tensor
blocks_to_swap_out: torch.Tensor
blocks_to_copy: torch.Tensor
if self.is_driver_worker:
assert seq_group_metadata_list is not None
assert execute_model_req is not None
num_seq_groups = len(seq_group_metadata_list)
blocks_to_swap_in = execute_model_req.blocks_to_swap_in
blocks_to_swap_out = execute_model_req.blocks_to_swap_out
# `blocks_to_swap_in` and `blocks_to_swap_out` are cpu tensors.
# they contain parameters to launch cudamemcpyasync.
blocks_to_swap_in = torch.tensor(
execute_model_req.blocks_to_swap_in,
device="cpu",
dtype=torch.int64).view(-1, 2)
blocks_to_swap_out = torch.tensor(
execute_model_req.blocks_to_swap_out,
device="cpu",
dtype=torch.int64).view(-1, 2)
# `blocks_to_copy` is a gpu tensor. The src and tgt of
# blocks to copy are in the same device, and `blocks_to_copy`
# can be used directly within cuda kernels.
blocks_to_copy = torch.tensor(execute_model_req.blocks_to_copy,
device=self.device,
dtype=torch.int64).view(-1, 2)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment