ggml.py 3.98 KB
Newer Older
Matt Hoffner's avatar
Matt Hoffner committed
1
import requests
2
import logging
Matt Hoffner's avatar
Matt Hoffner committed
3
4
import time
from lm_eval.utils import Reorderer
5
from lm_eval.base import BaseLM
Matt Hoffner's avatar
Matt Hoffner committed
6
7
8
from tqdm import tqdm
from requests.exceptions import RequestException

9
10
logger = logging.getLogger(__name__)

Matt Hoffner's avatar
Matt Hoffner committed
11
12
13
14
15
16
17
18
19
20
21
def ggml_completion(base_url, retries=3, delay=5, **kwargs):
    for _ in range(retries):
        try:
            response = requests.post(f"{base_url}/v1/completions", json=kwargs)
            response.raise_for_status()
            return response.json()
        except RequestException as e:
            logger.error(f"RequestException: {e}")
            time.sleep(delay)  # wait before retrying
    else:
        raise Exception(f"Failed to get a valid response after {retries} retries. Last exception: {e}")
Matt Hoffner's avatar
Matt Hoffner committed
22

Matt Hoffner's avatar
Matt Hoffner committed
23
class GGMLLM(BaseLM):
Matt Hoffner's avatar
Matt Hoffner committed
24
25
26
27
28
29
    def __init__(self, base_url, truncate=False):
        super().__init__()
        self.base_url = base_url
        self.truncate = truncate

    def loglikelihood(self, requests):
Matt Hoffner's avatar
Matt Hoffner committed
30
31
32
        reorderer = Reorderer(requests, len)
        requests = reorderer.get_reordered()

Matt Hoffner's avatar
Matt Hoffner committed
33
34
        res = []
        for context, continuation in tqdm(requests):
Matt Hoffner's avatar
Matt Hoffner committed
35
            response = ggml_completion(self.base_url, context=context, continuation=continuation)
36
37
38
            if response and "choices" in response and response["choices"]:
                choice = response["choices"][0]
                logprobs = choice.get("logprobs")
Matt Hoffner's avatar
Matt Hoffner committed
39
40
41
42
                try:
                    logprob = logprobs["token_logprobs"][0]
                except TypeError:
                    raise ValueError("Invalid logprobs data. Expected 'logprobs' to contain 'token_logprobs' list.")
43
                is_greedy = choice["finish_reason"] == "length"
Matt Hoffner's avatar
Matt Hoffner committed
44
45
                res.append((logprob, is_greedy))
            else:
46
                logger.error(f"Invalid response for loglikelihood. Response: {response}")
Matt Hoffner's avatar
Matt Hoffner committed
47
                assert False
Matt Hoffner's avatar
Matt Hoffner committed
48
        return reorderer.get_original(res)
Matt Hoffner's avatar
Matt Hoffner committed
49
50
51
52
53

    def greedy_until(self, requests):
        if not requests:
            return []

Matt Hoffner's avatar
Matt Hoffner committed
54
55
56
        reorderer = Reorderer(requests, len)
        requests = reorderer.get_reordered()

Matt Hoffner's avatar
Matt Hoffner committed
57
58
59
60
61
        res = []
        for request in tqdm(requests):
            inp = request[0]
            request_args = request[1]
            until = request_args["until"]
Matt Hoffner's avatar
Matt Hoffner committed
62
            response = ggml_completion(self.base_url, context=inp, stop=until)
Matt Hoffner's avatar
Matt Hoffner committed
63
64
65
66
67
68
69
70
71
            print(response);
            if response and "choices" in response and response["choices"]:
                choice = response["choices"][0]
                if "text" in choice:
                    generated_text = choice["text"].strip()
                    res.append(generated_text)
                else:
                    logger.error(f"Invalid response for greedy_until. Response: {response}")
                    res.append(None)  # Add default value in case of error
Matt Hoffner's avatar
Matt Hoffner committed
72
            else:
73
                logger.error(f"Invalid response for greedy_until. Response: {response}")
Matt Hoffner's avatar
Matt Hoffner committed
74
75
76
77
                res.append(None)  # Add default value in case of error
        return reorderer.get_original(res)


78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    
    def _model_call(self, inps):
        # Placeholder implementation
        raise NotImplementedError()

    def _model_generate(self, context, max_length, eos_token_id):
        # Placeholder implementation
        raise NotImplementedError()

    @property
    def batch_size(self):
        # Placeholder implementation
        raise NotImplementedError()

    @property
    def device(self):
        # Placeholder implementation
        raise NotImplementedError()

    @property
    def eot_token_id(self):
        # Placeholder implementation
        raise NotImplementedError()

    @property
    def max_length(self):
        # Placeholder implementation
        raise NotImplementedError()

    @property
    def max_gen_toks(self):
        # Placeholder implementation
        raise NotImplementedError()

    def tok_encode(self, string: str):
        # Placeholder implementation
        raise NotImplementedError()

    def tok_decode(self, tokens):
        # Placeholder implementation
        raise NotImplementedError()