# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from __future__ import annotations from typing import Any, Optional import torch _PROMPT_PAYLOAD_ATTR = "_kv_compression_prompt_payload" _COMPACT_SLOTS_ATTR = "_kv_compression_compact_slots" _COMPACT_SLOTS_BY_LAYER_ATTR = "_kv_compression_compact_slots_by_layer" def get_kv_compression_prompt_payload( forward_context: Any, ) -> Optional[dict[str, torch.Tensor]]: return getattr(forward_context, _PROMPT_PAYLOAD_ATTR, None) def set_kv_compression_prompt_payload( forward_context: Any, payload: dict[str, torch.Tensor], ) -> None: setattr(forward_context, _PROMPT_PAYLOAD_ATTR, payload) def _kv_compression_layer_key(layer: Any) -> str: layer_name = getattr(layer, "layer_name", None) if layer_name is None: layer_name = str(id(layer)) return str(layer_name) def get_kv_compression_compact_slots( forward_context: Any, *, per_layer_topk: bool, layer: Any, ) -> Optional[torch.Tensor]: if per_layer_topk: dst_by_layer = getattr(forward_context, _COMPACT_SLOTS_BY_LAYER_ATTR, None) if dst_by_layer is None: return None return dst_by_layer.get(_kv_compression_layer_key(layer)) return getattr(forward_context, _COMPACT_SLOTS_ATTR, None) def set_kv_compression_compact_slots( forward_context: Any, *, per_layer_topk: bool, layer: Any, dst: torch.Tensor, ) -> None: if per_layer_topk: dst_by_layer = getattr(forward_context, _COMPACT_SLOTS_BY_LAYER_ATTR, None) if dst_by_layer is None: dst_by_layer = {} setattr(forward_context, _COMPACT_SLOTS_BY_LAYER_ATTR, dst_by_layer) dst_by_layer[_kv_compression_layer_key(layer)] = dst else: setattr(forward_context, _COMPACT_SLOTS_ATTR, dst)