Unverified Commit 20cfcdec authored by youkaichao's avatar youkaichao Committed by GitHub
Browse files

[Core][Optimization] change python dict to pytorch tensor for blocks to swap (#4659)

parent ad932a22
...@@ -195,15 +195,14 @@ class Worker(WorkerBase): ...@@ -195,15 +195,14 @@ class Worker(WorkerBase):
def cache_swap( def cache_swap(
self, self,
blocks_to_swap_in: Dict[int, int], blocks_to_swap_in: torch.Tensor,
blocks_to_swap_out: Dict[int, int], blocks_to_swap_out: torch.Tensor,
blocks_to_copy: torch.Tensor, blocks_to_copy: torch.Tensor,
) -> None: ) -> None:
# Issue cache operations. # Issue cache operations.
# TODO(woosuk): Profile swapping overhead and optimize if needed. if blocks_to_swap_in.numel() > 0:
if blocks_to_swap_in:
self.cache_engine.swap_in(blocks_to_swap_in) self.cache_engine.swap_in(blocks_to_swap_in)
if blocks_to_swap_out: if blocks_to_swap_out.numel() > 0:
self.cache_engine.swap_out(blocks_to_swap_out) self.cache_engine.swap_out(blocks_to_swap_out)
if blocks_to_copy.numel() > 0: if blocks_to_copy.numel() > 0:
self.cache_engine.copy(blocks_to_copy) self.cache_engine.copy(blocks_to_copy)
...@@ -219,12 +218,26 @@ class Worker(WorkerBase): ...@@ -219,12 +218,26 @@ class Worker(WorkerBase):
else: else:
seq_group_metadata_list = execute_model_req.seq_group_metadata_list seq_group_metadata_list = execute_model_req.seq_group_metadata_list
blocks_to_swap_in: torch.Tensor
blocks_to_swap_out: torch.Tensor
blocks_to_copy: torch.Tensor
if self.is_driver_worker: if self.is_driver_worker:
assert seq_group_metadata_list is not None assert seq_group_metadata_list is not None
assert execute_model_req is not None assert execute_model_req is not None
num_seq_groups = len(seq_group_metadata_list) num_seq_groups = len(seq_group_metadata_list)
blocks_to_swap_in = execute_model_req.blocks_to_swap_in # `blocks_to_swap_in` and `blocks_to_swap_out` are cpu tensors.
blocks_to_swap_out = execute_model_req.blocks_to_swap_out # they contain parameters to launch cudamemcpyasync.
blocks_to_swap_in = torch.tensor(
execute_model_req.blocks_to_swap_in,
device="cpu",
dtype=torch.int64).view(-1, 2)
blocks_to_swap_out = torch.tensor(
execute_model_req.blocks_to_swap_out,
device="cpu",
dtype=torch.int64).view(-1, 2)
# `blocks_to_copy` is a gpu tensor. The src and tgt of
# blocks to copy are in the same device, and `blocks_to_copy`
# can be used directly within cuda kernels.
blocks_to_copy = torch.tensor(execute_model_req.blocks_to_copy, blocks_to_copy = torch.tensor(execute_model_req.blocks_to_copy,
device=self.device, device=self.device,
dtype=torch.int64).view(-1, 2) dtype=torch.int64).view(-1, 2)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment