"examples/backends/vllm/launch/agg_omni_i2v.sh" did not exist on "f242b4552b8ae37d0a3c2a4f0438e57d6f4240f3"
metadata.py 2.3 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
from dataclasses import dataclass
王敏's avatar
王敏 committed
4
from typing import  Optional
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

import numpy as np
import torch


@dataclass
class SpecDecodeMetadata:

    # [num_tokens]
    draft_token_ids: torch.Tensor
    # [batch_size]
    num_draft_tokens: list[int]
    # [batch_size]
    cu_num_draft_tokens: torch.Tensor
    # [num_tokens]
    target_logits_indices: torch.Tensor
    # [batch_size]
    bonus_logits_indices: torch.Tensor
    # [num_tokens + batch_size]
    logits_indices: torch.Tensor
王敏's avatar
王敏 committed
25
26
    # [batch_size]
    spec_decode_ids: Optional[list[str]] = None
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

    def __post_init__(self):
        self.max_spec_len = max(self.num_draft_tokens)

    @classmethod
    def make_dummy(
        cls,
        draft_token_ids: list[list[int]],
        device: torch.device,
    ) -> "SpecDecodeMetadata":
        batch_size = len(draft_token_ids)
        num_draft_tokens = [len(ids) for ids in draft_token_ids]
        flattened_draft_token_ids = sum(draft_token_ids, [])
        num_tokens = len(flattened_draft_token_ids)

        draft_token_ids_tensor = torch.tensor(flattened_draft_token_ids,
                                              dtype=torch.int32,
                                              device=device)
        cu_num_draft_tokens = np.cumsum(num_draft_tokens, dtype=np.int32)
        cu_num_draft_tokens_tensor = torch.from_numpy(cu_num_draft_tokens).to(
            device)

        target_logits_indices = torch.zeros(num_tokens,
                                            dtype=torch.int32,
                                            device=device)
        bonus_logits_indices = torch.zeros(batch_size,
                                           dtype=torch.int32,
                                           device=device)
        logits_indices = torch.zeros(num_tokens + batch_size,
                                     dtype=torch.int32,
                                     device=device)
        return cls(
            draft_token_ids=draft_token_ids_tensor,
            num_draft_tokens=num_draft_tokens,
            cu_num_draft_tokens=cu_num_draft_tokens_tensor,
            target_logits_indices=target_logits_indices,
            bonus_logits_indices=bonus_logits_indices,
            logits_indices=logits_indices,
        )