block.py 2.32 KB
Newer Older
1
"""Token blocks."""
Woosuk Kwon's avatar
Woosuk Kwon committed
2
3
from typing import List

Woosuk Kwon's avatar
Woosuk Kwon committed
4
from vllm.utils import Device
Woosuk Kwon's avatar
Woosuk Kwon committed
5

6
_BLANK_TOKEN_ID = -1
Woosuk Kwon's avatar
Woosuk Kwon committed
7

8
9
DEFAULT_LAST_ACCESSED_TIME = -1

Woosuk Kwon's avatar
Woosuk Kwon committed
10
11

class LogicalTokenBlock:
12
13
14
15
16
    """A block that stores a contiguous chunk of tokens from left to right.

    Logical blocks are used to represent the states of the corresponding
    physical blocks in the KV cache.
    """
Woosuk Kwon's avatar
Woosuk Kwon committed
17

Woosuk Kwon's avatar
Woosuk Kwon committed
18
    def __init__(
Woosuk Kwon's avatar
Woosuk Kwon committed
19
20
21
22
23
24
25
        self,
        block_number: int,
        block_size: int,
    ) -> None:
        self.block_number = block_number
        self.block_size = block_size

26
        self.token_ids = [_BLANK_TOKEN_ID] * block_size
Woosuk Kwon's avatar
Woosuk Kwon committed
27
28
29
30
31
32
33
34
35
36
37
        self.num_tokens = 0

    def is_empty(self) -> bool:
        return self.num_tokens == 0

    def get_num_empty_slots(self) -> int:
        return self.block_size - self.num_tokens

    def is_full(self) -> bool:
        return self.num_tokens == self.block_size

38
    def append_tokens(self, token_ids: List[int]) -> None:
Woosuk Kwon's avatar
Woosuk Kwon committed
39
        assert len(token_ids) <= self.get_num_empty_slots()
40
41
        curr_idx = self.num_tokens
        self.token_ids[curr_idx:curr_idx + len(token_ids)] = token_ids
Woosuk Kwon's avatar
Woosuk Kwon committed
42
43
44
45
46
        self.num_tokens += len(token_ids)

    def get_token_ids(self) -> List[int]:
        return self.token_ids[:self.num_tokens]

47
48
49
50
    def get_last_token_id(self) -> int:
        assert self.num_tokens > 0
        return self.token_ids[self.num_tokens - 1]

Woosuk Kwon's avatar
Woosuk Kwon committed
51
52

class PhysicalTokenBlock:
53
    """Represents the state of a block in the KV cache."""
Woosuk Kwon's avatar
Woosuk Kwon committed
54
55
56
57
58
59

    def __init__(
        self,
        device: Device,
        block_number: int,
        block_size: int,
60
61
        block_hash: int,
        num_hashed_tokens: int,
Woosuk Kwon's avatar
Woosuk Kwon committed
62
63
64
65
    ) -> None:
        self.device = device
        self.block_number = block_number
        self.block_size = block_size
66
67
        self.block_hash = block_hash
        self.num_hashed_tokens = num_hashed_tokens
Woosuk Kwon's avatar
Woosuk Kwon committed
68
69

        self.ref_count = 0
70
71
72
        self.last_accessed = DEFAULT_LAST_ACCESSED_TIME

        self.computed = False
Woosuk Kwon's avatar
Woosuk Kwon committed
73
74
75
76

    def __repr__(self) -> str:
        return (f'PhysicalTokenBlock(device={self.device}, '
                f'block_number={self.block_number}, '
77
78
79
80
                f'num_hashed_tokens={self.num_hashed_tokens}, '
                f'ref_count={self.ref_count}, '
                f'last_accessed={self.last_accessed}, '
                f'computed={self.computed})')
81
82
83
84


# Mapping: logical block number -> physical block.
BlockTable = List[PhysicalTokenBlock]