Unverified Commit 88693683 authored by Sungjae Lee's avatar Sungjae Lee Committed by GitHub
Browse files

[Performance][Core] Optimize the performance of evictor v1 and v2 by applying...

[Performance][Core] Optimize the performance of evictor v1 and v2 by applying a priority queue and lazy deletion (#7209)
parent 6d917d0e
import enum import enum
import heapq
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import OrderedDict, Tuple from typing import Dict, List, Tuple
class EvictionPolicy(enum.Enum): class EvictionPolicy(enum.Enum):
...@@ -75,8 +76,14 @@ class LRUEvictor(Evictor): ...@@ -75,8 +76,14 @@ class LRUEvictor(Evictor):
highest num_hashed_tokens value, then one will be chose arbitrarily highest num_hashed_tokens value, then one will be chose arbitrarily
""" """
# CLEANUP_THRESHOLD determines the maximum allowable size of the priority
# queue relative to the free table size. When this threshold is exceeded,
# a cleanup operation is triggered to reduce memory usage.
CLEANUP_THRESHOLD = 50
def __init__(self): def __init__(self):
self.free_table: OrderedDict[int, BlockMetaData] = OrderedDict() self.free_table: Dict[int, BlockMetaData] = {}
self.priority_queue = []
def __contains__(self, block_id: int) -> bool: def __contains__(self, block_id: int) -> bool:
return block_id in self.free_table return block_id in self.free_table
...@@ -85,34 +92,50 @@ class LRUEvictor(Evictor): ...@@ -85,34 +92,50 @@ class LRUEvictor(Evictor):
if len(self.free_table) == 0: if len(self.free_table) == 0:
raise ValueError("No usable cache memory left") raise ValueError("No usable cache memory left")
evicted_block, evicted_block_id = None, None while self.priority_queue:
# The blocks with the lowest timestamps should be placed consecutively # We do not remove outdated entries from the priority queue at the
# at the start of OrderedDict. Loop through all these blocks to # time of updating the last_accessed timestamp. Instead, outdated
# find the one with maximum number of hashed tokens. # entries are filtered out here during eviction. Outdated entries
for _id, block in self.free_table.items(): # would either not in the free table, or have older last accessed
if evicted_block is None: # time.
evicted_block, evicted_block_id = block, _id last_accessed, _, block_id, content_hash = heapq.heappop(
continue self.priority_queue)
if evicted_block.last_accessed < block.last_accessed: if (block_id in self.free_table and
break self.free_table[block_id].last_accessed == last_accessed):
if evicted_block.num_hashed_tokens < block.num_hashed_tokens: self.free_table.pop(block_id)
evicted_block, evicted_block_id = block, _id return block_id, content_hash
assert evicted_block is not None raise ValueError("No usable cache memory left")
assert evicted_block_id is not None
self.free_table.pop(evicted_block_id)
return evicted_block_id, evicted_block.content_hash
def add(self, block_id: int, content_hash: int, num_hashed_tokens: int, def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
last_accessed: float): last_accessed: float):
self.free_table[block_id] = BlockMetaData(content_hash, self.free_table[block_id] = BlockMetaData(content_hash,
num_hashed_tokens, num_hashed_tokens,
last_accessed) last_accessed)
heapq.heappush(
self.priority_queue,
(last_accessed, -num_hashed_tokens, block_id, content_hash))
self._cleanup_if_necessary()
def update(self, block_id: int, last_accessed: float): def update(self, block_id: int, last_accessed: float):
self.free_table[block_id].last_accessed = last_accessed self.free_table[block_id].last_accessed = last_accessed
def _cleanup_if_necessary(self):
if len(self.priority_queue) > LRUEvictor.CLEANUP_THRESHOLD * len(
self.free_table):
self._cleanup()
def _cleanup(self):
new_priority_queue: List[Tuple[float, int, int, int]] = []
for block_id, block in self.free_table.items():
new_priority_queue.append(
(block.last_accessed, -block.num_hashed_tokens, block_id,
block.content_hash))
heapq.heapify(new_priority_queue)
self.priority_queue = new_priority_queue
def remove(self, block_id: int): def remove(self, block_id: int):
if block_id not in self.free_table: if block_id not in self.free_table:
raise ValueError( raise ValueError(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment