Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
7f1d4b5a
Commit
7f1d4b5a
authored
Jan 02, 2024
by
Vicki Boykis
Browse files
local completions with echo=True
parent
4d10ad56
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
46 additions
and
11 deletions
+46
-11
lm_eval/models/openai_completions.py
lm_eval/models/openai_completions.py
+46
-11
No files found.
lm_eval/models/openai_completions.py
View file @
7f1d4b5a
import
copy
import
copy
import
os
from
collections
import
defaultdict
from
collections
import
defaultdict
from
importlib.util
import
find_spec
from
importlib.util
import
find_spec
from
typing
import
List
,
Optional
,
Tuple
from
typing
import
List
,
Literal
,
Optional
,
Tuple
import
transformers
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
lm_eval
import
utils
from
lm_eval
import
utils
...
@@ -69,24 +69,33 @@ def oa_completion(**kwargs):
...
@@ -69,24 +69,33 @@ def oa_completion(**kwargs):
return
completion
()
return
completion
()
@
register_model
(
"openai-completions"
)
@
register_model
(
"openai-completions"
,
"local-completions"
)
class
OpenaiCompletionsLM
(
LM
):
class
OpenaiCompletionsLM
(
LM
):
REQ_CHUNK_SIZE
=
20
REQ_CHUNK_SIZE
=
20
_DEFAULT_MAX_LENGTH
=
2048
_DEFAULT_MAX_LENGTH
=
2048
def
__init__
(
def
__init__
(
self
,
self
,
model
:
str
=
"text-davinci-003"
,
model
:
str
=
"gpt-3.5-turbo-instruct"
,
tokenizer_backend
:
Literal
[
"tiktoken"
,
"huggingface"
]
=
"tiktoken"
,
batch_size
=
1
,
base_url
:
str
=
None
,
truncate
:
bool
=
False
,
truncate
:
bool
=
False
,
max_gen_toks
:
int
=
256
,
max_gen_toks
:
int
=
256
,
batch_size
:
int
=
1
,
seed
:
int
=
1234
,
seed
:
int
=
1234
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
revision
:
Optional
[
str
]
=
"main"
,
trust_remote_code
:
Optional
[
bool
]
=
False
,
use_fast_tokenizer
:
Optional
[
bool
]
=
True
,
)
->
None
:
)
->
None
:
"""
"""
:param engine: str
:param model: str
OpenAI API engine (e.g. davinci)
Implements an OpenAI-style chat completion API for
accessing both OpenAI OR locally-hosted models using
HuggingFace Tokenizer
OpenAI API model (e.g. gpt-3.5-turbo)
using the **gen_kwargs passed on init
:param truncate: bool
:param truncate: bool
Truncate input if too long (if False and input is too long, throw error)
Truncate input if too long (if False and input is too long, throw error)
"""
"""
...
@@ -101,15 +110,41 @@ class OpenaiCompletionsLM(LM):
...
@@ -101,15 +110,41 @@ class OpenaiCompletionsLM(LM):
please install these via `pip install lm-eval[openai]` or `pip install -e .[openai]`"
,
please install these via `pip install lm-eval[openai]` or `pip install -e .[openai]`"
,
)
)
self
.
model
=
model
self
.
model
=
model
self
.
tokenizer
=
tiktoken
.
encoding_for_model
(
self
.
model
)
self
.
base_url
=
base_url
self
.
vocab_size
=
self
.
tokenizer
.
n_vocab
self
.
tokenizer_backend
=
tokenizer
_backend
self
.
truncate
=
truncate
self
.
truncate
=
truncate
self
.
end_of_text_token_id
=
self
.
tokenizer
.
eot_token
self
.
_max_gen_toks
=
max_gen_toks
self
.
_max_gen_toks
=
max_gen_toks
self
.
_max_length
=
max_length
self
.
_max_length
=
max_length
# if we have a local model, use HF tokenizer over tiktoken
if
self
.
tokenizer_backend
==
"huggingface"
:
self
.
revision
=
revision
self
.
trust_remote_code
=
trust_remote_code
self
.
use_fast_tokenizer
=
use_fast_tokenizer
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
self
.
model
,
revision
=
self
.
revision
,
trust_remote_code
=
self
.
trust_remote_code
,
use_fast_tokenizer
=
self
.
use_fast_tokenizer
,
)
self
.
vocab_size
=
self
.
tokenizer
.
vocab
self
.
end_of_text_token_id
=
self
.
tokenizer
.
eos_token
elif
self
.
tokenizer_backend
==
"tiktoken"
:
self
.
tokenizer
=
tiktoken
.
encoding_for_model
(
self
.
model
)
self
.
vocab_size
=
self
.
tokenizer
.
n_vocab
self
.
end_of_text_token_id
=
self
.
tokenizer
.
eot_token
else
:
raise
ValueError
(
f
"Expected tokenizer_backend to be one of ['tiktoken', 'huggingface'] but got
{
self
.
tokenizer_backend
}
"
)
# Read from environment variable OPENAI_API_KEY
# Read from environment variable OPENAI_API_KEY
openai
.
api_key
=
os
.
environ
[
"OPENAI_API_KEY"
]
# Set to EMPTY for local
if
self
.
base_url
:
self
.
client
=
openai
.
OpenAI
(
base_url
=
self
.
base_url
)
else
:
self
.
client
=
openai
.
OpenAI
()
# openai.AsyncOpenAI()
@
property
@
property
def
eot_token_id
(
self
):
def
eot_token_id
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment