Commit ac9f4be2 authored by Matt Hoffner's avatar Matt Hoffner
Browse files

rename to ggml

parent 3ee4c2e2
...@@ -4,7 +4,7 @@ from . import anthropic_llms ...@@ -4,7 +4,7 @@ from . import anthropic_llms
from . import huggingface from . import huggingface
from . import textsynth from . import textsynth
from . import dummy from . import dummy
from . import llama from . import ggml
MODEL_REGISTRY = { MODEL_REGISTRY = {
"hf": gpt2.HFLM, "hf": gpt2.HFLM,
...@@ -16,7 +16,7 @@ MODEL_REGISTRY = { ...@@ -16,7 +16,7 @@ MODEL_REGISTRY = {
"anthropic": anthropic_llms.AnthropicLM, "anthropic": anthropic_llms.AnthropicLM,
"textsynth": textsynth.TextSynthLM, "textsynth": textsynth.TextSynthLM,
"dummy": dummy.DummyLM, "dummy": dummy.DummyLM,
"llama": llama.LlamaCppLM "ggml": ggml.GGMLLM
} }
......
...@@ -9,7 +9,7 @@ import time ...@@ -9,7 +9,7 @@ import time
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def llama_completion(base_url, prompt, **kwargs): def ggml_completion(base_url, prompt, **kwargs):
try: try:
response = requests.post(f"{base_url}/v1/completions", json=kwargs) response = requests.post(f"{base_url}/v1/completions", json=kwargs)
response.raise_for_status() response.raise_for_status()
...@@ -18,7 +18,7 @@ def llama_completion(base_url, prompt, **kwargs): ...@@ -18,7 +18,7 @@ def llama_completion(base_url, prompt, **kwargs):
print(f"RequestException: {e}") print(f"RequestException: {e}")
return None return None
class LlamaCppLM(BaseLM): class GGMLLM(BaseLM):
def __init__(self, base_url, truncate=False): def __init__(self, base_url, truncate=False):
super().__init__() super().__init__()
self.base_url = base_url self.base_url = base_url
...@@ -27,7 +27,7 @@ class LlamaCppLM(BaseLM): ...@@ -27,7 +27,7 @@ class LlamaCppLM(BaseLM):
def loglikelihood(self, requests): def loglikelihood(self, requests):
res = [] res = []
for context, continuation in tqdm(requests): for context, continuation in tqdm(requests):
response = llama_completion(self.base_url, context, continuation=continuation) response = ggml_completion(self.base_url, context, continuation=continuation)
print(f"Loglikelihood response: {response}") print(f"Loglikelihood response: {response}")
if response and "choices" in response and response["choices"]: if response and "choices" in response and response["choices"]:
choice = response["choices"][0] choice = response["choices"][0]
...@@ -49,7 +49,7 @@ class LlamaCppLM(BaseLM): ...@@ -49,7 +49,7 @@ class LlamaCppLM(BaseLM):
inp = request[0] inp = request[0]
request_args = request[1] request_args = request[1]
until = request_args["until"] until = request_args["until"]
response = self.llama_completion(inp, context=res, stop=until) # Pass the context response = self.ggml_completion(inp, context=res, stop=until) # Pass the context
print(f"Greedy_until response: {response}") print(f"Greedy_until response: {response}")
if response and "text" in response: if response and "text" in response:
generated_text = response["text"].strip() generated_text = response["text"].strip()
......
import unittest import unittest
from unittest.mock import MagicMock from unittest.mock import MagicMock
from lm_eval.models.llama import LlamaCppLM from lm_eval.models.ggml import GGMLLM
class LlamaCppLMTest(unittest.TestCase): class GGMLLMTest(unittest.TestCase):
def test_loglikelihood(self): def test_loglikelihood(self):
base_url = "https://matthoffner-ggml-llm-api.hf.space" base_url = "https://matthoffner-ggml-llm-api.hf.space"
lm = LlamaCppLM(base_url) lm = GGMLLM(base_url)
# Create a MagicMock object to mock llama_completion # Create a MagicMock object to mock ggml_completion
llama_completion_mock = MagicMock() ggml_completion_mock = MagicMock()
# Set the return value for the mocked function # Set the return value for the mocked function
llama_completion_mock.return_value = { ggml_completion_mock.return_value = {
"logprob": -1.2345, "logprob": -1.2345,
"is_greedy": True "is_greedy": True
} }
# Patch the llama_completion function with the mocked function # Patch the ggml_completion function with the mocked function
lm.llama_completion = llama_completion_mock lm.ggml_completion = ggml_completion_mock
# Test loglikelihood # Test loglikelihood
requests = [("context1", "continuation1"), ("context2", "continuation2")] requests = [("context1", "continuation1"), ("context2", "continuation2")]
...@@ -29,16 +29,16 @@ class LlamaCppLMTest(unittest.TestCase): ...@@ -29,16 +29,16 @@ class LlamaCppLMTest(unittest.TestCase):
def test_greedy_until(self): def test_greedy_until(self):
base_url = "https://matthoffner-ggml-llm-api.hf.space" base_url = "https://matthoffner-ggml-llm-api.hf.space"
lm = LlamaCppLM(base_url) lm = GGMLLM(base_url)
# Define the llama_completion method with the desired behavior # Define the ggml_completion method with the desired behavior
def llama_completion_mock(url, context, stop=None): def ggml_completion_mock(url, context, stop=None):
if stop is not None: if stop is not None:
return {"text": f"generated_text{stop[-1]}"} return {"text": f"generated_text{stop[-1]}"}
return {"text": "generated_text"} return {"text": "generated_text"}
# Set the llama_completion method to the defined mock # Set the ggml_completion method to the defined mock
lm.llama_completion = llama_completion_mock lm.ggml_completion = ggml_completion_mock
# Test greedy_until # Test greedy_until
requests = [("input1", {"until": "stop1"}), ("input2", {"until": "stop2"})] requests = [("input1", {"until": "stop1"}), ("input2", {"until": "stop2"})]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment