utils.py 887 Bytes
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
"""Attention backend utils"""
4
from dataclasses import dataclass
5
from typing import Optional
6

7
from vllm.config import ModelConfig
8
from vllm.logger import init_logger
9

10
logger = init_logger(__name__)
11

12
13
PAD_SLOT_ID = -1

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

@dataclass
class MLADims:
    q_lora_rank: Optional[int]
    kv_lora_rank: int
    qk_nope_head_dim: int
    qk_rope_head_dim: int
    v_head_dim: int


def get_mla_dims(model_config: ModelConfig) -> MLADims:
    hf_text_config = model_config.hf_text_config

    return MLADims(
        q_lora_rank=getattr(hf_text_config, "q_lora_rank", None),
        kv_lora_rank=hf_text_config.kv_lora_rank,
        qk_nope_head_dim=hf_text_config.qk_nope_head_dim,
        qk_rope_head_dim=hf_text_config.qk_rope_head_dim,
        v_head_dim=hf_text_config.v_head_dim,
    )