"vscode:/vscode.git/clone" did not exist on "371e3f397a45914afc83b79403cabe1d9a41b931"
Unverified Commit cee9f329 authored by Ying Sheng's avatar Ying Sheng Committed by GitHub
Browse files

[minor fix] llama4 hybrid memory (#7950)

parent eb118d88
...@@ -520,8 +520,13 @@ class SWAKVPool(KVCache): ...@@ -520,8 +520,13 @@ class SWAKVPool(KVCache):
self.layers_mapping[global_layer_id] = (swa_layer_id, True) self.layers_mapping[global_layer_id] = (swa_layer_id, True)
self.full_to_swa_index_mapping: Optional[torch.Tensor] = None self.full_to_swa_index_mapping: Optional[torch.Tensor] = None
k_size, v_size = self.get_kv_size_bytes()
self.mem_usage = (k_size + v_size) / GB
def get_kv_size_bytes(self): def get_kv_size_bytes(self):
raise NotImplementedError k_size, v_size = self.full_kv_pool.get_kv_size_bytes()
k_size_swa, v_size_swa = self.swa_kv_pool.get_kv_size_bytes()
return k_size + k_size_swa, v_size + v_size_swa
def get_contiguous_buf_infos(self): def get_contiguous_buf_infos(self):
full_kv_data_ptrs, full_kv_data_lens, full_kv_item_lens = ( full_kv_data_ptrs, full_kv_data_lens, full_kv_item_lens = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment