Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
a5d57f15
Commit
a5d57f15
authored
Aug 07, 2023
by
baberabb
Browse files
added tiktoken and changed openai dependency to optional
parent
8ad386eb
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
25 deletions
+22
-25
lm_eval/models/openai_completions.py
lm_eval/models/openai_completions.py
+21
-24
setup.py
setup.py
+1
-1
No files found.
lm_eval/models/openai_completions.py
View file @
a5d57f15
import
os
import
time
import
transformers
# type: ignore
from
typing
import
List
,
Tuple
from
tqdm
import
tqdm
from
lm_eval
import
utils
...
...
@@ -41,7 +40,13 @@ def oa_completion(**kwargs):
Retry with back-off until they respond
"""
import
openai
try
:
import
openai
,
tiktoken
# noqa: E401
except
ModuleNotFoundError
:
raise
Exception
(
"attempted to use 'openai' LM type, but package `openai` or `tiktoken` are not installed.
\
please install these via `pip install lm-eval[openai]` or `pip install -e .[openai]`"
,
)
backoff_time
=
3
while
True
:
...
...
@@ -73,28 +78,25 @@ class OpenaiCompletionsLM(LM):
Truncate input if too long (if False and input is too long, throw error)
"""
super
().
__init__
()
import
openai
try
:
import
openai
,
tiktoken
# noqa: E401
except
ModuleNotFoundError
:
raise
Exception
(
"attempted to use 'openai' LM type, but package `openai` or `tiktoken` are not installed.
\
please install these via `pip install lm-eval[openai]` or `pip install -e .[openai]`"
,
)
self
.
engine
=
engine
self
.
tokenizer
=
transformers
.
GPT2TokenizerFast
.
from_pretrained
(
"gpt2"
)
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
# to make the annoying "Using pad_token, but it is not set yet." error go away
self
.
tokenizer
.
pad_token
=
"<|endoftext|>"
assert
self
.
tokenizer
.
encode
(
"hello
\n\n
hello"
)
==
[
31373
,
198
,
198
,
31373
]
self
.
tokenizer
=
tiktoken
.
encoding_for_model
(
self
.
engine
)
self
.
vocab_size
=
self
.
tokenizer
.
n_vocab
self
.
truncate
=
truncate
self
.
end_of_text_token_id
=
self
.
tokenizer
.
convert_tokens_to_ids
(
[
"<|endoftext|>"
]
)[
0
]
self
.
end_of_text_token_id
=
self
.
tokenizer
.
eot_token
# Read from environment variable OPENAI_API_SECRET_KEY
openai
.
api_key
=
os
.
environ
[
"OPENAI_API_SECRET_KEY"
]
@
property
def
eot_token_id
(
self
):
return
self
.
tokenizer
.
eos
_token_id
return
self
.
end_of_text
_token_id
@
property
def
max_length
(
self
):
...
...
@@ -116,7 +118,7 @@ class OpenaiCompletionsLM(LM):
raise
NotImplementedError
()
def
tok_encode
(
self
,
string
:
str
)
->
List
[
int
]:
return
self
.
tokenizer
.
encode
(
string
,
add_special_tokens
=
False
)
return
self
.
tokenizer
.
encode
(
string
)
def
tok_decode
(
self
,
tokens
:
List
[
int
])
->
str
:
return
self
.
tokenizer
.
decode
(
tokens
)
...
...
@@ -236,12 +238,7 @@ class OpenaiCompletionsLM(LM):
inp
=
context_enc
[
-
(
self
.
max_length
-
self
.
max_gen_toks
)
:]
inps
.
append
(
inp
)
try
:
until
=
request_args
[
"until"
][
0
]
# TODO: does this handle a list of stop seqs correctly?
except
KeyError
:
until
=
"<|endoftext|>"
until
=
request_args
.
get
(
"until"
,
[
"<|endoftext|>"
])
response
=
oa_completion
(
engine
=
self
.
engine
,
...
...
@@ -255,7 +252,7 @@ class OpenaiCompletionsLM(LM):
for
resp
,
(
context
,
args_
)
in
zip
(
response
.
choices
,
chunk
):
s
=
resp
[
"text"
]
until_
=
args_
.
get
(
"until"
,
[])
until_
=
args_
.
get
(
"until"
,
[
"<|endoftext|>"
])
for
term
in
until_
:
if
len
(
term
)
>
0
:
...
...
setup.py
View file @
a5d57f15
...
...
@@ -36,7 +36,6 @@ setuptools.setup(
"evaluate>=0.4.0"
,
"jsonlines"
,
"numexpr"
,
"openai>=0.6.4"
,
"omegaconf>=2.2"
,
"peft>=0.2.0"
,
"pybind11>=2.6.2"
,
...
...
@@ -67,5 +66,6 @@ setuptools.setup(
],
"gptq"
:
[
"auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"
],
"anthropic"
:
[
"anthropic"
],
"openai"
:
[
"openai"
,
"tiktoken"
],
},
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment