Unverified Commit 6ebd02bd authored by ElizaWszola's avatar ElizaWszola Committed by GitHub
Browse files

[PREFIX CACHING FOLLOW UP] OrderedDict-based evictor (#3431)


Co-authored-by: default avatarrsnm2 <rshaw@neuralmagic.com>
Co-authored-by: default avatarLuka <luka@paperspace>
parent 523e30ea
import enum import enum
from typing import Dict from typing import OrderedDict
from abc import ABC, abstractmethod, abstractproperty from abc import ABC, abstractmethod, abstractproperty
from vllm.block import PhysicalTokenBlock from vllm.block import PhysicalTokenBlock
...@@ -58,27 +58,26 @@ class LRUEvictor(Evictor): ...@@ -58,27 +58,26 @@ class LRUEvictor(Evictor):
""" """
def __init__(self): def __init__(self):
self.free_table: Dict[int, PhysicalTokenBlock] = {} self.free_table: OrderedDict[int, PhysicalTokenBlock] = OrderedDict()
def __contains__(self, block_hash: int) -> bool: def __contains__(self, block_hash: int) -> bool:
return block_hash in self.free_table return block_hash in self.free_table
# TODO: The performance of this evict function can be optimized further.
def evict(self) -> PhysicalTokenBlock: def evict(self) -> PhysicalTokenBlock:
if len(self.free_table) == 0: if len(self.free_table) == 0:
raise ValueError("No usable cache memory left") raise ValueError("No usable cache memory left")
free_blocks = self.free_table.values()
# Get evicted block evicted_block = next(iter(self.free_table.values()))
evicted_block: PhysicalTokenBlock = next(iter(free_blocks)) # The blocks with the lowest timestamps should be placed consecutively
# at the start of OrderedDict. Loop through all these blocks to
for block in free_blocks: # find the one with maximum number of hashed tokens.
if (block.last_accessed < evicted_block.last_accessed for _, block in self.free_table.items():
or block.last_accessed == evicted_block.last_accessed and if evicted_block.last_accessed < block.last_accessed:
block.num_hashed_tokens > evicted_block.num_hashed_tokens): break
if evicted_block.num_hashed_tokens < block.num_hashed_tokens:
evicted_block = block evicted_block = block
del self.free_table[evicted_block.block_hash] self.free_table.pop(evicted_block.block_hash)
evicted_block.computed = False evicted_block.computed = False
return evicted_block return evicted_block
...@@ -91,7 +90,7 @@ class LRUEvictor(Evictor): ...@@ -91,7 +90,7 @@ class LRUEvictor(Evictor):
raise ValueError( raise ValueError(
"Attempting to remove block that's not in the evictor") "Attempting to remove block that's not in the evictor")
block: PhysicalTokenBlock = self.free_table[block_hash] block: PhysicalTokenBlock = self.free_table[block_hash]
del self.free_table[block_hash] self.free_table.pop(block_hash)
return block return block
@property @property
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment