evictor.py 5.28 KB
Newer Older
1
import enum
2
import heapq
3
from abc import ABC, abstractmethod
4
from typing import Dict, List, Tuple
5
6
7
8
9
10
11
12
13
14
15


class EvictionPolicy(enum.Enum):
    """Enum for eviction policy used by make_evictor to instantiate the correct
       Evictor subclass.
    """
    LRU = enum.auto()


class Evictor(ABC):
    """The Evictor subclasses should be used by the BlockAllocator class to
16
    handle eviction of freed Blocks.
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
    """

    @abstractmethod
    def __init__(self):
        pass

    @abstractmethod
    def __contains__(self, block_id: int) -> bool:
        pass

    @abstractmethod
    def evict(self) -> Tuple[int, int]:
        """Runs the eviction algorithm and returns the evicted block's
        content hash along with physical block id along with physical block id
        """
        pass

    @abstractmethod
    def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
36
            last_accessed: float):
37
38
39
40
        """Adds block to the evictor, making it a candidate for eviction"""
        pass

    @abstractmethod
41
    def update(self, block_id: int, last_accessed: float):
42
43
44
        """Update corresponding block's access time in metadata"""
        pass

45
46
47
48
49
    @abstractmethod
    def remove(self, block_id: int):
        """Remove a given block id from the cache."""
        pass

50
51
    @property
    @abstractmethod
52
53
54
55
    def num_blocks(self) -> int:
        pass


56
class BlockMetaData:
57
58
59
60
61
62
63
64
    """Data structure for storing key data describe cached block, so that
    evitor could use to make its decision which one to choose for eviction

    Here we use physical block id as the dict key, as there maybe several
    blocks with the same content hash, but their physical id is unique.
    """

    def __init__(self, content_hash: int, num_hashed_tokens: int,
65
                 last_accessed: float):
66
67
68
69
70
71
72
        self.content_hash = content_hash
        self.num_hashed_tokens = num_hashed_tokens
        self.last_accessed = last_accessed


class LRUEvictor(Evictor):
    """Evicts in a least-recently-used order using the last_accessed timestamp
73
    that's recorded in the Block. If there are multiple blocks with
74
75
76
77
78
    the same last_accessed time, then the one with the largest num_hashed_tokens
    will be evicted. If two blocks each have the lowest last_accessed time and
    highest num_hashed_tokens value, then one will be chose arbitrarily
    """

79
80
81
82
83
    # CLEANUP_THRESHOLD determines the maximum allowable size of the priority
    # queue relative to the free table size. When this threshold is exceeded,
    # a cleanup operation is triggered to reduce memory usage.
    CLEANUP_THRESHOLD = 50

84
    def __init__(self):
85
86
        self.free_table: Dict[int, BlockMetaData] = {}
        self.priority_queue = []
87
88
89
90
91
92
93
94

    def __contains__(self, block_id: int) -> bool:
        return block_id in self.free_table

    def evict(self) -> Tuple[int, int]:
        if len(self.free_table) == 0:
            raise ValueError("No usable cache memory left")

95
96
97
98
99
100
101
102
103
104
105
106
107
108
        while self.priority_queue:
            # We do not remove outdated entries from the priority queue at the
            # time of updating the last_accessed timestamp. Instead, outdated
            # entries are filtered out here during eviction. Outdated entries
            # would either not in the free table, or have older last accessed
            # time.
            last_accessed, _, block_id, content_hash = heapq.heappop(
                self.priority_queue)
            if (block_id in self.free_table and
                    self.free_table[block_id].last_accessed == last_accessed):
                self.free_table.pop(block_id)
                return block_id, content_hash

        raise ValueError("No usable cache memory left")
109
110

    def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
111
            last_accessed: float):
112
113
114
        self.free_table[block_id] = BlockMetaData(content_hash,
                                                  num_hashed_tokens,
                                                  last_accessed)
115
116
117
118
        heapq.heappush(
            self.priority_queue,
            (last_accessed, -num_hashed_tokens, block_id, content_hash))
        self._cleanup_if_necessary()
119

120
    def update(self, block_id: int, last_accessed: float):
121
122
        self.free_table[block_id].last_accessed = last_accessed

123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
    def _cleanup_if_necessary(self):
        if len(self.priority_queue) > LRUEvictor.CLEANUP_THRESHOLD * len(
                self.free_table):
            self._cleanup()

    def _cleanup(self):
        new_priority_queue: List[Tuple[float, int, int, int]] = []

        for block_id, block in self.free_table.items():
            new_priority_queue.append(
                (block.last_accessed, -block.num_hashed_tokens, block_id,
                 block.content_hash))
        heapq.heapify(new_priority_queue)

        self.priority_queue = new_priority_queue

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
    def remove(self, block_id: int):
        if block_id not in self.free_table:
            raise ValueError(
                "Attempting to remove block that's not in the evictor")
        self.free_table.pop(block_id)

    @property
    def num_blocks(self) -> int:
        return len(self.free_table)


def make_evictor(eviction_policy: EvictionPolicy) -> Evictor:
    if eviction_policy == EvictionPolicy.LRU:
        return LRUEvictor()
    else:
        raise ValueError(f"Unknown cache eviction policy: {eviction_policy}")