model.py 2.73 KB
Newer Older
1
2
import torch

3
from abc import ABC, abstractmethod
4
from typing import List, Tuple, Optional, TypeVar, Type
5
from transformers import PreTrainedTokenizerBase
6

7
from text_generation_server.models.types import Batch, GeneratedText
8

9
10
B = TypeVar("B", bound=Batch)

11

12
class Model(ABC):
13
14
15
16
17
18
19
20
21
    def __init__(
        self,
        tokenizer: PreTrainedTokenizerBase,
        device: torch.device,
        decode_buffer: int = 3,
    ):
        if decode_buffer < 1:
            raise ValueError("decode_buffer must be >= 1")

22
        self.tokenizer = tokenizer
23
        self.all_special_ids = set(tokenizer.all_special_ids)
24
        self.device = device
25
        self.decode_buffer = decode_buffer
26

27
    @property
28
    @abstractmethod
29
    def batch_type(self) -> Type[B]:
30
        raise NotImplementedError
31

32
33
34
    @abstractmethod
    def generate_token(self, batch: B) -> Tuple[List[GeneratedText], Optional[B]]:
        raise NotImplementedError
35

36
37
38
39
40
41
    def decode_token(
        self,
        all_input_ids: List[int],
        offset: Optional[int] = None,
        token_offset: Optional[int] = None,
    ) -> Tuple[str, Optional[int], Optional[int]]:
42
        """Hack to hopefully support generate_stream for the maximum number of tokenizers"""
43
44
45
46
47
48
49
50
        if all_input_ids[-1] in self.all_special_ids:
            return (
                self.tokenizer.decode(all_input_ids[-1], skip_special_tokens=False),
                None,
                None,
            )

        if token_offset is None:
51
52
53
54
55
56
57
58
            token_offset = len(all_input_ids) - self.decode_buffer
            # left token buffer
            if self.decode_buffer > 1:
                # Decode token_offset token minus last one and token_offset tokens
                raw_texts = self.tokenizer.batch_decode(
                    [all_input_ids[token_offset:-1], all_input_ids[token_offset:]],
                    skip_special_tokens=False,
                )
59

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
                # default offset is only the last token
                offset = len(raw_texts[0])
                sequence_text = raw_texts[1]
            else:
                # Only decode the last token without using a token buffer
                sequence_text = self.tokenizer.decode(
                    all_input_ids[-1], skip_special_tokens=False
                )
                # no offset in this case
                offset = 0
        else:
            assert offset is not None
            sequence_text = self.tokenizer.decode(
                all_input_ids[token_offset:],
                skip_special_tokens=False,
            )
76
77

        # get text
78
        token_text = sequence_text[offset:]
79
80

        # if text is utf-8
81
82
        if token_text and token_text[-1] != "�":
            return token_text, None, None
83
84
        else:
            return "", offset, token_offset