"vllm/vscode:/vscode.git/clone" did not exist on "29acd2c34cc542c96dbb584ea089f4b5404e54ef"
utils.py 928 Bytes
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
3
"""Block manager utils."""
from vllm.sequence import SequenceGroup
4
5
from vllm.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
                        STR_NOT_IMPL_ENC_DEC_SWA)
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22


def check_no_caching_or_swa_for_blockmgr_encdec(
        block_mgr, seq_group: SequenceGroup) -> None:
    '''
    Enforce that prefix caching & sliding-window attention (SWA)
    are currently unsupported *specifically* for encoder/decoder models.

    Raises NotImplementedError if unsupported scenario is detected.

    Arguments:

    * block_mgr: BlockSpaceManager instance
    * seq_group: SequenceGroup passed to block_mgr
    '''

    if seq_group.is_encoder_decoder():
23
        if block_mgr.max_block_sliding_window is not None:
24
25
26
27
            raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_SWA)

        if block_mgr.enable_caching:
            raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE)