Commit a5d57f15 authored by baberabb's avatar baberabb
Browse files

added tiktoken and changed openai dependency to optional

parent 8ad386eb
import os import os
import time import time
import transformers # type: ignore
from typing import List, Tuple from typing import List, Tuple
from tqdm import tqdm from tqdm import tqdm
from lm_eval import utils from lm_eval import utils
...@@ -41,7 +40,13 @@ def oa_completion(**kwargs): ...@@ -41,7 +40,13 @@ def oa_completion(**kwargs):
Retry with back-off until they respond Retry with back-off until they respond
""" """
import openai try:
import openai, tiktoken # noqa: E401
except ModuleNotFoundError:
raise Exception(
"attempted to use 'openai' LM type, but package `openai` or `tiktoken` are not installed. \
please install these via `pip install lm-eval[openai]` or `pip install -e .[openai]`",
)
backoff_time = 3 backoff_time = 3
while True: while True:
...@@ -73,28 +78,25 @@ class OpenaiCompletionsLM(LM): ...@@ -73,28 +78,25 @@ class OpenaiCompletionsLM(LM):
Truncate input if too long (if False and input is too long, throw error) Truncate input if too long (if False and input is too long, throw error)
""" """
super().__init__() super().__init__()
try:
import openai import openai, tiktoken # noqa: E401
except ModuleNotFoundError:
raise Exception(
"attempted to use 'openai' LM type, but package `openai` or `tiktoken` are not installed. \
please install these via `pip install lm-eval[openai]` or `pip install -e .[openai]`",
)
self.engine = engine self.engine = engine
self.tokenizer = transformers.GPT2TokenizerFast.from_pretrained("gpt2") self.tokenizer = tiktoken.encoding_for_model(self.engine)
self.vocab_size = self.tokenizer.n_vocab
self.vocab_size = self.tokenizer.vocab_size
# to make the annoying "Using pad_token, but it is not set yet." error go away
self.tokenizer.pad_token = "<|endoftext|>"
assert self.tokenizer.encode("hello\n\nhello") == [31373, 198, 198, 31373]
self.truncate = truncate self.truncate = truncate
self.end_of_text_token_id = self.tokenizer.convert_tokens_to_ids( self.end_of_text_token_id = self.tokenizer.eot_token
["<|endoftext|>"]
)[0]
# Read from environment variable OPENAI_API_SECRET_KEY # Read from environment variable OPENAI_API_SECRET_KEY
openai.api_key = os.environ["OPENAI_API_SECRET_KEY"] openai.api_key = os.environ["OPENAI_API_SECRET_KEY"]
@property @property
def eot_token_id(self): def eot_token_id(self):
return self.tokenizer.eos_token_id return self.end_of_text_token_id
@property @property
def max_length(self): def max_length(self):
...@@ -116,7 +118,7 @@ class OpenaiCompletionsLM(LM): ...@@ -116,7 +118,7 @@ class OpenaiCompletionsLM(LM):
raise NotImplementedError() raise NotImplementedError()
def tok_encode(self, string: str) -> List[int]: def tok_encode(self, string: str) -> List[int]:
return self.tokenizer.encode(string, add_special_tokens=False) return self.tokenizer.encode(string)
def tok_decode(self, tokens: List[int]) -> str: def tok_decode(self, tokens: List[int]) -> str:
return self.tokenizer.decode(tokens) return self.tokenizer.decode(tokens)
...@@ -236,12 +238,7 @@ class OpenaiCompletionsLM(LM): ...@@ -236,12 +238,7 @@ class OpenaiCompletionsLM(LM):
inp = context_enc[-(self.max_length - self.max_gen_toks) :] inp = context_enc[-(self.max_length - self.max_gen_toks) :]
inps.append(inp) inps.append(inp)
try: until = request_args.get("until", ["<|endoftext|>"])
until = request_args["until"][
0
] # TODO: does this handle a list of stop seqs correctly?
except KeyError:
until = "<|endoftext|>"
response = oa_completion( response = oa_completion(
engine=self.engine, engine=self.engine,
...@@ -255,7 +252,7 @@ class OpenaiCompletionsLM(LM): ...@@ -255,7 +252,7 @@ class OpenaiCompletionsLM(LM):
for resp, (context, args_) in zip(response.choices, chunk): for resp, (context, args_) in zip(response.choices, chunk):
s = resp["text"] s = resp["text"]
until_ = args_.get("until", []) until_ = args_.get("until", ["<|endoftext|>"])
for term in until_: for term in until_:
if len(term) > 0: if len(term) > 0:
......
...@@ -36,7 +36,6 @@ setuptools.setup( ...@@ -36,7 +36,6 @@ setuptools.setup(
"evaluate>=0.4.0", "evaluate>=0.4.0",
"jsonlines", "jsonlines",
"numexpr", "numexpr",
"openai>=0.6.4",
"omegaconf>=2.2", "omegaconf>=2.2",
"peft>=0.2.0", "peft>=0.2.0",
"pybind11>=2.6.2", "pybind11>=2.6.2",
...@@ -67,5 +66,6 @@ setuptools.setup( ...@@ -67,5 +66,6 @@ setuptools.setup(
], ],
"gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"], "gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"],
"anthropic": ["anthropic"], "anthropic": ["anthropic"],
"openai": ["openai", "tiktoken"],
}, },
) )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment