block.py 1.79 KB
Newer Older
1
"""Token blocks."""
Woosuk Kwon's avatar
Woosuk Kwon committed
2
3
4
5
from typing import List

from cacheflow.utils import Device

6
_BLANK_TOKEN_ID = -1
Woosuk Kwon's avatar
Woosuk Kwon committed
7
8
9


class LogicalTokenBlock:
10
11
12
13
14
    """A block that stores a contiguous chunk of tokens from left to right.

    Logical blocks are used to represent the states of the corresponding
    physical blocks in the KV cache.
    """
Woosuk Kwon's avatar
Woosuk Kwon committed
15

Woosuk Kwon's avatar
Woosuk Kwon committed
16
    def __init__(
Woosuk Kwon's avatar
Woosuk Kwon committed
17
18
19
20
21
22
23
        self,
        block_number: int,
        block_size: int,
    ) -> None:
        self.block_number = block_number
        self.block_size = block_size

24
        self.token_ids = [_BLANK_TOKEN_ID] * block_size
Woosuk Kwon's avatar
Woosuk Kwon committed
25
26
27
28
29
30
31
32
33
34
35
        self.num_tokens = 0

    def is_empty(self) -> bool:
        return self.num_tokens == 0

    def get_num_empty_slots(self) -> int:
        return self.block_size - self.num_tokens

    def is_full(self) -> bool:
        return self.num_tokens == self.block_size

36
    def append_tokens(self, token_ids: List[int]) -> None:
Woosuk Kwon's avatar
Woosuk Kwon committed
37
38
39
40
41
42
43
        assert len(token_ids) <= self.get_num_empty_slots()
        self.token_ids[self.num_tokens:self.num_tokens + len(token_ids)] = token_ids
        self.num_tokens += len(token_ids)

    def get_token_ids(self) -> List[int]:
        return self.token_ids[:self.num_tokens]

44
45
46
47
    def get_last_token_id(self) -> int:
        assert self.num_tokens > 0
        return self.token_ids[self.num_tokens - 1]

Woosuk Kwon's avatar
Woosuk Kwon committed
48
49

class PhysicalTokenBlock:
50
    """Represents the state of a block in the KV cache."""
Woosuk Kwon's avatar
Woosuk Kwon committed
51
52
53
54
55
56
57
58
59
60
61
62

    def __init__(
        self,
        device: Device,
        block_number: int,
        block_size: int,
    ) -> None:
        self.device = device
        self.block_number = block_number
        self.block_size = block_size

        self.ref_count = 0
Woosuk Kwon's avatar
Woosuk Kwon committed
63
64
65
66
67

    def __repr__(self) -> str:
        return (f'PhysicalTokenBlock(device={self.device}, '
                f'block_number={self.block_number}, '
                f'ref_count={self.ref_count})')