interfaces.py 2.16 KB
Newer Older
1
from abc import ABC, abstractmethod
2
from dataclasses import dataclass
3
from typing import Optional
4
5
6

import torch

7
from vllm.sequence import ExecuteModelRequest
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


@dataclass
class SpeculativeProposals:
    """Datastructure used to represent proposal tokens from some proposer. It
    also tracks how many speculative tokens each sequence has.
    """

    # Speculative proposal tokens.
    proposal_token_ids: torch.Tensor

    # Probabilities of the proposal tokens according to the proposer.
    proposal_probs: torch.Tensor

    # The valid length of each proposal; can be zero.
    proposal_lens: torch.Tensor

    def __repr__(self):
        return (f"SpeculativeProposals("
27
                f"proposal_token_ids={self.proposal_token_ids}, "
28
                f"proposal_probs={self.proposal_probs.shape}, "
29
                f"proposal_lens={self.proposal_lens})")
30
31
32
33
34
35
36
37
38
39
40


@dataclass
class SpeculativeScores:
    """Datastructure used to represent the scores of speculative tokens
    according to the scoring model.
    """

    # Probabilities of the speculative tokens according to the scoring model.
    probs: torch.Tensor

41
42
43
44
45
    # Log-probabilities of the speculative tokens according to the scoring
    # model. These values can be used to generate Logprob objects that are
    # returned to the user.
    logprobs: torch.Tensor

46
47
48
49
    # Token ids sampled from the scoring model. Used for speculative bonus
    # tokens and also non-speculative normal decoding.
    token_ids: torch.Tensor

50
51
52
    # Optional last hidden states from the scoring model.
    hidden_states: Optional[torch.Tensor] = None

53
54
55
56
57
58
59
60
61
    def __repr__(self):
        return (f"SpeculativeScores("
                f"probs={self.probs.shape}, "
                f"token_ids={self.token_ids.shape})")


class SpeculativeProposer(ABC):

    @abstractmethod
62
    def get_spec_proposals(
63
        self,
64
        execute_model_req: ExecuteModelRequest,
65
66
67
68
69
70
71
72
73
    ) -> SpeculativeProposals:
        raise NotImplementedError


class SpeculativeScorer(ABC):

    @abstractmethod
    def score_proposals(
        self,
74
        execute_model_req: ExecuteModelRequest,
75
        proposals: SpeculativeProposals,
76
    ) -> SpeculativeScores:
77
        raise NotImplementedError