Unverified Commit ae7428a8 authored by huangtingwei's avatar huangtingwei Committed by GitHub
Browse files

fix mooncake store mla zero copy meta (#9678)

parent a3aee7c3
...@@ -705,7 +705,6 @@ class MLATokenToKVPoolHost(HostKVCache): ...@@ -705,7 +705,6 @@ class MLATokenToKVPoolHost(HostKVCache):
raise ValueError(f"Unsupported layout: {self.layout}") raise ValueError(f"Unsupported layout: {self.layout}")
def get_buffer_meta(self, keys, indices): def get_buffer_meta(self, keys, indices):
local_rank = get_tensor_model_parallel_rank()
ptr_list = [] ptr_list = []
key_list = [] key_list = []
kv_buffer_data_ptr = self.kv_buffer.data_ptr() kv_buffer_data_ptr = self.kv_buffer.data_ptr()
...@@ -719,7 +718,7 @@ class MLATokenToKVPoolHost(HostKVCache): ...@@ -719,7 +718,7 @@ class MLATokenToKVPoolHost(HostKVCache):
) )
ptr_list.append(k_ptr) ptr_list.append(k_ptr)
key_ = keys[index // self.page_size] key_ = keys[index // self.page_size]
key_list.append(f"{key_}_{local_rank}_k") key_list.append(f"{key_}_k")
element_size = ( element_size = (
self.layer_num self.layer_num
* self.dtype.itemsize * self.dtype.itemsize
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment