Unverified Commit 410225b7 authored by sjtu_shenhai's avatar sjtu_shenhai Committed by GitHub
Browse files

[Bug fix] Fix severe memory waste issue with torch.empty pin_memory (#12266)

parent 2c9aebea
...@@ -238,12 +238,16 @@ class MHATokenToKVPoolHost(HostKVCache): ...@@ -238,12 +238,16 @@ class MHATokenToKVPoolHost(HostKVCache):
raise ValueError(f"Unsupported layout: {self.layout}") raise ValueError(f"Unsupported layout: {self.layout}")
self.token_stride_size = self.head_num * self.head_dim * self.dtype.itemsize self.token_stride_size = self.head_num * self.head_dim * self.dtype.itemsize
self.layout_dim = self.token_stride_size * self.layer_num self.layout_dim = self.token_stride_size * self.layer_num
return torch.empty( buffer = torch.empty(
dims, dims,
dtype=self.dtype, dtype=self.dtype,
device=self.device, device=self.device,
pin_memory=self.pin_memory,
) )
if self.pin_memory:
torch.cuda.cudart().cudaHostRegister(
buffer.data_ptr(), buffer.numel() * buffer.element_size(), 0
)
return buffer
@property @property
def k_buffer(self): def k_buffer(self):
...@@ -551,13 +555,16 @@ class MLATokenToKVPoolHost(HostKVCache): ...@@ -551,13 +555,16 @@ class MLATokenToKVPoolHost(HostKVCache):
self.kv_lora_rank + self.qk_rope_head_dim self.kv_lora_rank + self.qk_rope_head_dim
) * self.dtype.itemsize ) * self.dtype.itemsize
self.layout_dim = self.token_stride_size * self.layer_num self.layout_dim = self.token_stride_size * self.layer_num
buffer = torch.empty(
return torch.empty(
dims, dims,
dtype=self.dtype, dtype=self.dtype,
device=self.device, device=self.device,
pin_memory=self.pin_memory,
) )
if self.pin_memory:
torch.cuda.cudart().cudaHostRegister(
buffer.data_ptr(), buffer.numel() * buffer.element_size(), 0
)
return buffer
def load_to_device_per_layer( def load_to_device_per_layer(
self, device_pool, host_indices, device_indices, layer_id, io_backend self, device_pool, host_indices, device_indices, layer_id, io_backend
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment