Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
a5d57f15
Commit
a5d57f15
authored
Aug 07, 2023
by
baberabb
Browse files
added tiktoken and changed openai dependency to optional
parent
8ad386eb
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
25 deletions
+22
-25
lm_eval/models/openai_completions.py
lm_eval/models/openai_completions.py
+21
-24
setup.py
setup.py
+1
-1
No files found.
lm_eval/models/openai_completions.py
View file @
a5d57f15
import
os
import
os
import
time
import
time
import
transformers
# type: ignore
from
typing
import
List
,
Tuple
from
typing
import
List
,
Tuple
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
lm_eval
import
utils
from
lm_eval
import
utils
...
@@ -41,7 +40,13 @@ def oa_completion(**kwargs):
...
@@ -41,7 +40,13 @@ def oa_completion(**kwargs):
Retry with back-off until they respond
Retry with back-off until they respond
"""
"""
import
openai
try
:
import
openai
,
tiktoken
# noqa: E401
except
ModuleNotFoundError
:
raise
Exception
(
"attempted to use 'openai' LM type, but package `openai` or `tiktoken` are not installed.
\
please install these via `pip install lm-eval[openai]` or `pip install -e .[openai]`"
,
)
backoff_time
=
3
backoff_time
=
3
while
True
:
while
True
:
...
@@ -73,28 +78,25 @@ class OpenaiCompletionsLM(LM):
...
@@ -73,28 +78,25 @@ class OpenaiCompletionsLM(LM):
Truncate input if too long (if False and input is too long, throw error)
Truncate input if too long (if False and input is too long, throw error)
"""
"""
super
().
__init__
()
super
().
__init__
()
try
:
import
openai
import
openai
,
tiktoken
# noqa: E401
except
ModuleNotFoundError
:
raise
Exception
(
"attempted to use 'openai' LM type, but package `openai` or `tiktoken` are not installed.
\
please install these via `pip install lm-eval[openai]` or `pip install -e .[openai]`"
,
)
self
.
engine
=
engine
self
.
engine
=
engine
self
.
tokenizer
=
transformers
.
GPT2TokenizerFast
.
from_pretrained
(
"gpt2"
)
self
.
tokenizer
=
tiktoken
.
encoding_for_model
(
self
.
engine
)
self
.
vocab_size
=
self
.
tokenizer
.
n_vocab
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
# to make the annoying "Using pad_token, but it is not set yet." error go away
self
.
tokenizer
.
pad_token
=
"<|endoftext|>"
assert
self
.
tokenizer
.
encode
(
"hello
\n\n
hello"
)
==
[
31373
,
198
,
198
,
31373
]
self
.
truncate
=
truncate
self
.
truncate
=
truncate
self
.
end_of_text_token_id
=
self
.
tokenizer
.
convert_tokens_to_ids
(
self
.
end_of_text_token_id
=
self
.
tokenizer
.
eot_token
[
"<|endoftext|>"
]
)[
0
]
# Read from environment variable OPENAI_API_SECRET_KEY
# Read from environment variable OPENAI_API_SECRET_KEY
openai
.
api_key
=
os
.
environ
[
"OPENAI_API_SECRET_KEY"
]
openai
.
api_key
=
os
.
environ
[
"OPENAI_API_SECRET_KEY"
]
@
property
@
property
def
eot_token_id
(
self
):
def
eot_token_id
(
self
):
return
self
.
tokenizer
.
eos
_token_id
return
self
.
end_of_text
_token_id
@
property
@
property
def
max_length
(
self
):
def
max_length
(
self
):
...
@@ -116,7 +118,7 @@ class OpenaiCompletionsLM(LM):
...
@@ -116,7 +118,7 @@ class OpenaiCompletionsLM(LM):
raise
NotImplementedError
()
raise
NotImplementedError
()
def
tok_encode
(
self
,
string
:
str
)
->
List
[
int
]:
def
tok_encode
(
self
,
string
:
str
)
->
List
[
int
]:
return
self
.
tokenizer
.
encode
(
string
,
add_special_tokens
=
False
)
return
self
.
tokenizer
.
encode
(
string
)
def
tok_decode
(
self
,
tokens
:
List
[
int
])
->
str
:
def
tok_decode
(
self
,
tokens
:
List
[
int
])
->
str
:
return
self
.
tokenizer
.
decode
(
tokens
)
return
self
.
tokenizer
.
decode
(
tokens
)
...
@@ -236,12 +238,7 @@ class OpenaiCompletionsLM(LM):
...
@@ -236,12 +238,7 @@ class OpenaiCompletionsLM(LM):
inp
=
context_enc
[
-
(
self
.
max_length
-
self
.
max_gen_toks
)
:]
inp
=
context_enc
[
-
(
self
.
max_length
-
self
.
max_gen_toks
)
:]
inps
.
append
(
inp
)
inps
.
append
(
inp
)
try
:
until
=
request_args
.
get
(
"until"
,
[
"<|endoftext|>"
])
until
=
request_args
[
"until"
][
0
]
# TODO: does this handle a list of stop seqs correctly?
except
KeyError
:
until
=
"<|endoftext|>"
response
=
oa_completion
(
response
=
oa_completion
(
engine
=
self
.
engine
,
engine
=
self
.
engine
,
...
@@ -255,7 +252,7 @@ class OpenaiCompletionsLM(LM):
...
@@ -255,7 +252,7 @@ class OpenaiCompletionsLM(LM):
for
resp
,
(
context
,
args_
)
in
zip
(
response
.
choices
,
chunk
):
for
resp
,
(
context
,
args_
)
in
zip
(
response
.
choices
,
chunk
):
s
=
resp
[
"text"
]
s
=
resp
[
"text"
]
until_
=
args_
.
get
(
"until"
,
[])
until_
=
args_
.
get
(
"until"
,
[
"<|endoftext|>"
])
for
term
in
until_
:
for
term
in
until_
:
if
len
(
term
)
>
0
:
if
len
(
term
)
>
0
:
...
...
setup.py
View file @
a5d57f15
...
@@ -36,7 +36,6 @@ setuptools.setup(
...
@@ -36,7 +36,6 @@ setuptools.setup(
"evaluate>=0.4.0"
,
"evaluate>=0.4.0"
,
"jsonlines"
,
"jsonlines"
,
"numexpr"
,
"numexpr"
,
"openai>=0.6.4"
,
"omegaconf>=2.2"
,
"omegaconf>=2.2"
,
"peft>=0.2.0"
,
"peft>=0.2.0"
,
"pybind11>=2.6.2"
,
"pybind11>=2.6.2"
,
...
@@ -67,5 +66,6 @@ setuptools.setup(
...
@@ -67,5 +66,6 @@ setuptools.setup(
],
],
"gptq"
:
[
"auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"
],
"gptq"
:
[
"auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"
],
"anthropic"
:
[
"anthropic"
],
"anthropic"
:
[
"anthropic"
],
"openai"
:
[
"openai"
,
"tiktoken"
],
},
},
)
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment