ggml.py 5.54 KB
Newer Older
Matt Hoffner's avatar
Matt Hoffner committed
1
import requests
2
import logging
Matt Hoffner's avatar
Matt Hoffner committed
3
import time
Matt Hoffner's avatar
Matt Hoffner committed
4
5
from tqdm import tqdm
from requests.exceptions import RequestException
6
import transformers
Matt Hoffner's avatar
Matt Hoffner committed
7
8
9
from lm_eval.utils import Reorderer
from lm_eval.base import BaseLM

10
11
logger = logging.getLogger(__name__)

Matt Hoffner's avatar
Matt Hoffner committed
12

Lorenzo's avatar
Lorenzo committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def get_result(logprobs, context_lenght):
    is_greedy = True
    offsets = logprobs['text_offset']
    tokens = logprobs['tokens']
    tokens_logprobs = logprobs['token_logprobs']

    idx = 0
    while offsets[idx] < context_lenght:
        idx += 1
    continuation_logprobs = sum(tokens_logprobs[idx:-1])
    for i in range(idx, len(tokens)):
        token = tokens[i]
        top_tokens = logprobs["top_logprobs"][i]
        top_token = max(top_tokens.keys(), key=lambda x: top_tokens[x])
        if top_token != token:
            is_greedy = False
            break

    return continuation_logprobs, is_greedy


Matt Hoffner's avatar
Matt Hoffner committed
34
class GGMLLM(BaseLM):
Matt Hoffner's avatar
Matt Hoffner committed
35
36
37
38
    def __init__(self, base_url, truncate=False):
        super().__init__()
        self.base_url = base_url
        self.truncate = truncate
39
40
        self.tokenizer = transformers.GPT2TokenizerFast.from_pretrained("gpt2")
        self.logpobs = 10
Lorenzo's avatar
Lorenzo committed
41
        self.temperature = 0.0
42
43
44
        self.max_length = 1024
        self.vocab_size = self.tokenizer.vocab_size

45
    def ggml_completion(self, context, continuation=None, stop=None, retries=3, delay=5, **kwargs):
46
47
48
        for _ in range(retries):
            try:
                prompt = context
Lorenzo's avatar
Lorenzo committed
49
50
                request = {'prompt': prompt, 'logprobs': self.logpobs,
                           'temperature': self.temperature}
51
52
                if continuation:
                    prompt += continuation
Lorenzo's avatar
Lorenzo committed
53
                    request.update({'prompt': prompt, 'max_tokens': 1, 'echo': True})
54
55
                if stop is not None:
                    request['stop'] = stop
56
                response = requests.post(f"{self.base_url}/v1/completions", json=request)
57
58
59
60
61
62
                response.raise_for_status()
                return response.json()
            except RequestException as e:
                logger.error(f"RequestException: {e}")
                time.sleep(delay)  # wait before retrying
        else:
63
            raise Exception(f"Failed to get a valid response after {retries} retries.")
64

Matt Hoffner's avatar
Matt Hoffner committed
65
    def loglikelihood(self, requests):
66
67
        if not requests:
            return []
Matt Hoffner's avatar
Matt Hoffner committed
68
69
        res = []
        for context, continuation in tqdm(requests):
70
            response = self.ggml_completion(context=context, continuation=continuation)
71
72
73
            if response and "choices" in response and response["choices"]:
                choice = response["choices"][0]
                logprobs = choice.get("logprobs")
74
                if logprobs and "token_logprobs" in logprobs and logprobs["token_logprobs"]:
Lorenzo's avatar
Lorenzo committed
75
                    logprob, is_greedy = get_result(logprobs, len(context))
76
77
78
                    res.append((logprob, is_greedy))
                else:
                    logger.warning("Invalid logprobs data. Expected 'logprobs' to contain 'token_logprobs' list.")
Matt Hoffner's avatar
Matt Hoffner committed
79
            else:
80
                logger.error(f"Invalid response for loglikelihood. Response: {response}")
Matt Hoffner's avatar
Matt Hoffner committed
81
                assert False
82
        return res
Matt Hoffner's avatar
Matt Hoffner committed
83
84
85
86
87
88
89
90
91
92

    def greedy_until(self, requests):
        if not requests:
            return []

        res = []
        for request in tqdm(requests):
            inp = request[0]
            request_args = request[1]
            until = request_args["until"]
93
            response = self.ggml_completion(context=inp, stop=until)
Matt Hoffner's avatar
Matt Hoffner committed
94
95
96
97
98
99
100
101
            if response and "choices" in response and response["choices"]:
                choice = response["choices"][0]
                if "text" in choice:
                    generated_text = choice["text"].strip()
                    res.append(generated_text)
                else:
                    logger.error(f"Invalid response for greedy_until. Response: {response}")
                    res.append(None)  # Add default value in case of error
Matt Hoffner's avatar
Matt Hoffner committed
102
            else:
103
                logger.error(f"Invalid response for greedy_until. Response: {response}")
Matt Hoffner's avatar
Matt Hoffner committed
104
                res.append(None)  # Add default value in case of error
105
        return res
Matt Hoffner's avatar
Matt Hoffner committed
106

Matt Hoffner's avatar
Matt Hoffner committed
107
108
109
110
111
112
    def loglikelihood_rolling(self, requests):
        results = []

        for request in requests:
            logprobs = []
            for i in range(0, len(request), self.max_length):
Lorenzo's avatar
Lorenzo committed
113
114
                chunk = request[i:i + self.max_length]
                chunk_loglikelihood = self.loglikelihood([(chunk, request[i + 1:i + self.max_length + 1])])
Matt Hoffner's avatar
Matt Hoffner committed
115
                logprobs.extend(chunk_loglikelihood)
Lorenzo's avatar
Lorenzo committed
116

Matt Hoffner's avatar
Matt Hoffner committed
117
118
119
120
            avg_loglikelihood = sum([logprob for logprob, _ in logprobs]) / len(logprobs)
            results.append((avg_loglikelihood, True))

        return results
Matt Hoffner's avatar
Matt Hoffner committed
121

122
123
124
125
126
127
128
129
    def _model_call(self, inps):
        # Placeholder implementation
        raise NotImplementedError()

    def _model_generate(self, context, max_length, eos_token_id):
        # Placeholder implementation
        raise NotImplementedError()

130
131
132
133
134
    def tok_encode(self, string: str):
        return self.tokenizer.encode(string, add_special_tokens=False)

    def tok_decode(self, tokens):
        return self.tokenizer.decode(tokens)
Lorenzo's avatar
Lorenzo committed
135

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
    @property
    def batch_size(self):
        # Placeholder implementation
        raise NotImplementedError()

    @property
    def device(self):
        # Placeholder implementation
        raise NotImplementedError()

    @property
    def eot_token_id(self):
        # Placeholder implementation
        raise NotImplementedError()

Lorenzo's avatar
Lorenzo committed
151
    def max_length(self):
152
        return self.max_length
153
154
155
156

    @property
    def max_gen_toks(self):
        # Placeholder implementation
Lorenzo's avatar
Lorenzo committed
157
        raise NotImplementedError()