block.py 1.9 KB
Newer Older
1
"""Token blocks."""
Woosuk Kwon's avatar
Woosuk Kwon committed
2
3
from typing import List

Woosuk Kwon's avatar
Woosuk Kwon committed
4
from vllm.utils import Device
Woosuk Kwon's avatar
Woosuk Kwon committed
5

6
_BLANK_TOKEN_ID = -1
Woosuk Kwon's avatar
Woosuk Kwon committed
7
8
9


class LogicalTokenBlock:
10
11
12
13
14
    """A block that stores a contiguous chunk of tokens from left to right.

    Logical blocks are used to represent the states of the corresponding
    physical blocks in the KV cache.
    """
Woosuk Kwon's avatar
Woosuk Kwon committed
15

Woosuk Kwon's avatar
Woosuk Kwon committed
16
    def __init__(
Woosuk Kwon's avatar
Woosuk Kwon committed
17
18
19
20
21
22
23
        self,
        block_number: int,
        block_size: int,
    ) -> None:
        self.block_number = block_number
        self.block_size = block_size

24
        self.token_ids = [_BLANK_TOKEN_ID] * block_size
Woosuk Kwon's avatar
Woosuk Kwon committed
25
26
27
28
29
30
31
32
33
34
35
        self.num_tokens = 0

    def is_empty(self) -> bool:
        return self.num_tokens == 0

    def get_num_empty_slots(self) -> int:
        return self.block_size - self.num_tokens

    def is_full(self) -> bool:
        return self.num_tokens == self.block_size

36
    def append_tokens(self, token_ids: List[int]) -> None:
Woosuk Kwon's avatar
Woosuk Kwon committed
37
        assert len(token_ids) <= self.get_num_empty_slots()
38
39
        curr_idx = self.num_tokens
        self.token_ids[curr_idx:curr_idx + len(token_ids)] = token_ids
Woosuk Kwon's avatar
Woosuk Kwon committed
40
41
42
43
44
        self.num_tokens += len(token_ids)

    def get_token_ids(self) -> List[int]:
        return self.token_ids[:self.num_tokens]

45
46
47
48
    def get_last_token_id(self) -> int:
        assert self.num_tokens > 0
        return self.token_ids[self.num_tokens - 1]

Woosuk Kwon's avatar
Woosuk Kwon committed
49
50

class PhysicalTokenBlock:
51
    """Represents the state of a block in the KV cache."""
Woosuk Kwon's avatar
Woosuk Kwon committed
52
53
54
55
56
57
58
59
60
61
62
63

    def __init__(
        self,
        device: Device,
        block_number: int,
        block_size: int,
    ) -> None:
        self.device = device
        self.block_number = block_number
        self.block_size = block_size

        self.ref_count = 0
Woosuk Kwon's avatar
Woosuk Kwon committed
64
65
66
67
68

    def __repr__(self) -> str:
        return (f'PhysicalTokenBlock(device={self.device}, '
                f'block_number={self.block_number}, '
                f'ref_count={self.ref_count})')
69
70
71
72


# Mapping: logical block number -> physical block.
BlockTable = List[PhysicalTokenBlock]