Unverified Commit e461c262 authored by yangzhibin's avatar yangzhibin Committed by GitHub
Browse files

[Misc] Remove unused vllm/block.py (#11336)

parent 276738ce
"""Token blocks."""
from typing import TYPE_CHECKING, Iterator, List, Optional
from vllm.utils import Device
DEFAULT_LAST_ACCESSED_TIME: float = -1
class PhysicalTokenBlock:
"""Represents the state of a block in the KV cache."""
def __init__(
self,
device: Device,
block_number: int,
block_size: int,
block_hash: int,
num_hashed_tokens: int,
) -> None:
self.device = device
self.block_number = block_number
self.block_size = block_size
self.block_hash = block_hash
self.num_hashed_tokens = num_hashed_tokens
self.ref_count = 0
self.last_accessed = DEFAULT_LAST_ACCESSED_TIME
self.computed = False
def __repr__(self) -> str:
return (f'PhysicalTokenBlock(device={self.device}, '
f'block_number={self.block_number}, '
f'num_hashed_tokens={self.num_hashed_tokens}, '
f'ref_count={self.ref_count}, '
f'last_accessed={self.last_accessed}, '
f'computed={self.computed})')
class BlockTable:
"""Holds a list of blocks with caching of their associated block_ids
"""
def __init__(self, blocks: Optional[List[PhysicalTokenBlock]] = None):
self._blocks: List[PhysicalTokenBlock] = []
self._block_ids: List[int] = []
if blocks is not None:
for block in blocks:
self.append(block)
def append(self, block: PhysicalTokenBlock):
self._blocks.append(block)
self._block_ids.append(block.block_number)
def __len__(self) -> int:
return len(self._blocks)
def __getitem__(self, key):
return self._blocks[key]
if TYPE_CHECKING:
def __iter__(self) -> Iterator[PhysicalTokenBlock]:
raise RuntimeError("Method should be automatically generated")
def __setitem__(self, key, value):
if isinstance(key, slice):
blocks = value
self._blocks[key] = blocks
self._block_ids[key] = [b.block_number for b in blocks]
else:
block = value
self._blocks[key] = block
self._block_ids[key] = block.block_number
def reset(self):
self._blocks = []
self._block_ids = []
def copy(self) -> "BlockTable":
return BlockTable(self._blocks)
def list(self) -> List[PhysicalTokenBlock]:
return self._blocks
def ids(self) -> List[int]:
return self._block_ids
...@@ -13,7 +13,7 @@ class EvictionPolicy(enum.Enum): ...@@ -13,7 +13,7 @@ class EvictionPolicy(enum.Enum):
class Evictor(ABC): class Evictor(ABC):
"""The Evictor subclasses should be used by the BlockAllocator class to """The Evictor subclasses should be used by the BlockAllocator class to
handle eviction of freed PhysicalTokenBlocks. handle eviction of freed Blocks.
""" """
@abstractmethod @abstractmethod
...@@ -70,7 +70,7 @@ class BlockMetaData: ...@@ -70,7 +70,7 @@ class BlockMetaData:
class LRUEvictor(Evictor): class LRUEvictor(Evictor):
"""Evicts in a least-recently-used order using the last_accessed timestamp """Evicts in a least-recently-used order using the last_accessed timestamp
that's recorded in the PhysicalTokenBlock. If there are multiple blocks with that's recorded in the Block. If there are multiple blocks with
the same last_accessed time, then the one with the largest num_hashed_tokens the same last_accessed time, then the one with the largest num_hashed_tokens
will be evicted. If two blocks each have the lowest last_accessed time and will be evicted. If two blocks each have the lowest last_accessed time and
highest num_hashed_tokens value, then one will be chose arbitrarily highest num_hashed_tokens value, then one will be chose arbitrarily
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment