Unverified Commit a01ddd96 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

misc: fix the req_to_token member change (#967)

parent 7fa54a1a
...@@ -289,10 +289,10 @@ class ModelTpServer: ...@@ -289,10 +289,10 @@ class ModelTpServer:
"KV cache pool leak detected!" "KV cache pool leak detected!"
) )
if self.req_to_token_pool.can_use_mem_size != self.req_to_token_pool.size: if len(self.req_to_token_pool.free_slots) != self.req_to_token_pool.size:
warnings.warn( warnings.warn(
"Warning: " "Warning: "
f"available req slots={self.req_to_token_pool.can_use_mem_size}, " f"available req slots={len(self.req_to_token_pool.free_slots)}, "
f"total slots={self.req_to_token_pool.size}\n" f"total slots={self.req_to_token_pool.size}\n"
"Memory pool leak detected!" "Memory pool leak detected!"
) )
......
...@@ -32,7 +32,6 @@ class ReqToTokenPool: ...@@ -32,7 +32,6 @@ class ReqToTokenPool:
self.req_to_token = torch.empty( self.req_to_token = torch.empty(
(size, max_context_len), dtype=torch.int32, device="cuda" (size, max_context_len), dtype=torch.int32, device="cuda"
) )
self.can_use_mem_size = size
def alloc(self, need_size: int) -> List[int]: def alloc(self, need_size: int) -> List[int]:
if need_size > len(self.free_slots): if need_size > len(self.free_slots):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment