Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
7f24a08b
Commit
7f24a08b
authored
Oct 11, 2021
by
Leo Gao
Browse files
Refactor LM organization for more reuse
parent
e5066c69
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
275 additions
and
247 deletions
+275
-247
lm_eval/base.py
lm_eval/base.py
+62
-9
lm_eval/models/__init__.py
lm_eval/models/__init__.py
+1
-0
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+168
-180
lm_eval/models/gpt3.py
lm_eval/models/gpt3.py
+43
-57
tests/test_gpt3.py
tests/test_gpt3.py
+1
-1
No files found.
lm_eval/base.py
View file @
7f24a08b
import
abc
import
random
from
typing
import
Iterable
import
numpy
as
np
import
re
from
tqdm
import
tqdm
from
lm_eval.metrics
import
mean
,
perplexity
,
weighted_perplexity
,
weighted_mean
from
lm_eval
import
utils
class
LM
(
abc
.
ABC
):
...
...
@@ -96,20 +99,70 @@ class LM(abc.ABC):
pass
@
classmethod
def
create_from_arg_string
(
cls
,
arg_string
):
"""Constructor method, in case models need additional arguments
e.g. OpenAI API engine, paths for loading, other params
:param arg_string: str
Left up to individual model class to handle
"""
return
cls
()
def
create_from_arg_string
(
cls
,
arg_string
,
additional_config
=
{}):
args
=
utils
.
simple_parse_args_string
(
arg_string
)
args2
=
{
k
:
v
for
k
,
v
in
additional_config
.
items
()
if
v
is
not
None
}
return
cls
(
**
args
,
**
args2
)
def
set_cache_hook
(
self
,
cache_hook
):
self
.
cache_hook
=
cache_hook
class
TokenizedLM
(
LM
):
@
abc
.
abstractmethod
def
tok_encode
(
self
,
string
:
str
):
pass
@
abc
.
abstractmethod
def
tok_decode
(
self
,
tokens
:
Iterable
[
int
]):
pass
@
abc
.
abstractmethod
def
_loglikelihood_tokens
(
self
,
requests
,
disable_tqdm
=
False
):
pass
# subclass must implement properties vocab_size, eot_token_id, max_gen_toks.
# TODO: enforce this somehow
def
loglikelihood
(
self
,
requests
):
new_reqs
=
[]
for
context
,
continuation
in
requests
:
if
context
==
""
:
# end of text as context
context_enc
=
[
self
.
eot_token_id
]
else
:
context_enc
=
self
.
tok_encode
(
context
)
continuation_enc
=
self
.
tok_encode
(
continuation
)
new_reqs
.
append
(((
context
,
continuation
),
context_enc
,
continuation_enc
))
return
self
.
_loglikelihood_tokens
(
new_reqs
)
def
loglikelihood_rolling
(
self
,
requests
):
# TODO: Implement caching once we've confirmed the perplexity implementation
# TODO: automatic batch size detection for vectorization
loglikelihoods
=
[]
for
string
,
in
tqdm
(
requests
):
rolling_token_windows
=
list
(
map
(
utils
.
make_disjoint_window
,
utils
.
get_rolling_token_windows
(
token_list
=
self
.
tok_encode
(
string
),
prefix_token
=
self
.
eot_token_id
,
max_seq_len
=
self
.
max_length
,
context_len
=
1
,
)))
rolling_token_windows
=
[(
None
,)
+
x
for
x
in
rolling_token_windows
]
# TODO: extract out this call so it only gets called once and also somehow figure out partial caching for that
string_nll
=
self
.
_loglikelihood_tokens
(
rolling_token_windows
,
disable_tqdm
=
True
)
# discard is_greedy
string_nll
=
[
x
[
0
]
for
x
in
string_nll
]
string_nll
=
sum
(
string_nll
)
loglikelihoods
.
append
(
string_nll
)
return
loglikelihoods
class
Task
(
abc
.
ABC
):
"""A task represents an entire benchmark including its dataset, problems,
answers, and evaluation methods. See BoolQ for a simple example implementation
...
...
lm_eval/models/__init__.py
View file @
7f24a08b
...
...
@@ -3,6 +3,7 @@ from . import gpt3
from
.
import
dummy
MODEL_REGISTRY
=
{
"hf"
:
gpt2
.
HFLM
,
"gpt2"
:
gpt2
.
GPT2LM
,
"gpt3"
:
gpt3
.
GPT3LM
,
"dummy"
:
dummy
.
DummyLM
,
...
...
lm_eval/models/gpt2.py
View file @
7f24a08b
...
...
@@ -2,115 +2,36 @@ import transformers
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
lm_eval.base
import
LM
from
lm_eval.base
import
LM
,
TokenizedLM
from
lm_eval
import
utils
from
tqdm
import
tqdm
import
numpy
as
np
from
abc
import
ABC
,
abstractmethod
from
typing
import
Iterable
class
GPT2LM
(
LM
):
MAX_GEN_TOKS
=
256
class
TorchLM
(
TokenizedLM
):
@
abstractmethod
def
_model_generate
(
self
,
context
,
max_length
,
eos_token_id
):
pass
def
__init__
(
self
,
device
=
'cuda'
,
pretrained
=
'gpt2'
,
revision
=
'main'
,
subfolder
=
None
,
tokenizer
=
None
,
batch_size
=
1
):
super
().
__init__
()
assert
isinstance
(
device
,
str
)
assert
isinstance
(
pretrained
,
str
)
assert
isinstance
(
batch_size
,
int
)
if
device
:
self
.
device
=
torch
.
device
(
device
)
else
:
self
.
device
=
torch
.
device
(
'cuda'
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
'cpu'
)
# TODO: update this to be less of a hack once subfolder is fixed in HF
self
.
gpt2
=
transformers
.
AutoModelForCausalLM
.
from_pretrained
(
pretrained
,
revision
=
revision
+
(
"/"
+
subfolder
if
subfolder
is
not
None
else
""
)).
to
(
self
.
device
)
self
.
gpt2
.
eval
()
# pretrained tokenizer for neo is broken for now so just hardcoding this to gpt2
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
pretrained
if
tokenizer
is
None
else
tokenizer
,
revision
=
revision
,
subfolder
=
subfolder
)
assert
isinstance
(
self
.
tokenizer
,
(
transformers
.
GPT2Tokenizer
,
transformers
.
GPT2TokenizerFast
,
transformers
.
T5Tokenizer
,
transformers
.
T5TokenizerFast
,
)),
"this tokenizer has not been checked for compatibility yet!"
self
.
VOCAB_SIZE
=
self
.
tokenizer
.
vocab_size
self
.
EOT_TOKEN_ID
=
self
.
tokenizer
.
eos_token_id
print
(
self
.
EOT_TOKEN_ID
)
try
:
self
.
max_length
=
self
.
gpt2
.
config
.
n_ctx
except
AttributeError
:
# gptneoconfig doesn't have n_ctx apparantly
self
.
max_length
=
self
.
gpt2
.
config
.
max_position_embeddings
if
isinstance
(
self
.
tokenizer
,
(
transformers
.
GPT2Tokenizer
,
transformers
.
GPT2TokenizerFast
)):
assert
self
.
tokenizer
.
encode
(
'hello
\n\n
hello'
)
==
[
31373
,
198
,
198
,
31373
]
# multithreading and batching
gpus
=
torch
.
cuda
.
device_count
()
batch_size_per_gpu
=
batch_size
# todo: adaptive batch size
# TODO: fix multi-gpu
self
.
batch_size
=
batch_size_per_gpu
# * gpus
# TODO: fix multi-gpu
# if gpus > 1:
# self.gpt2 = nn.DataParallel(self.gpt2)
@
classmethod
def
create_from_arg_string
(
cls
,
arg_string
,
additional_config
=
{}):
args
=
utils
.
simple_parse_args_string
(
arg_string
)
args2
=
{
k
:
v
for
k
,
v
in
additional_config
.
items
()
if
v
is
not
None
}
return
cls
(
**
args
,
**
args2
)
def
loglikelihood
(
self
,
requests
):
new_reqs
=
[]
for
context
,
continuation
in
requests
:
if
context
==
""
:
# end of text as context
context_enc
=
[
self
.
EOT_TOKEN_ID
]
else
:
context_enc
=
self
.
tokenizer
.
encode
(
context
,
add_special_tokens
=
False
)
continuation_enc
=
self
.
tokenizer
.
encode
(
continuation
,
add_special_tokens
=
False
)
new_reqs
.
append
(((
context
,
continuation
),
context_enc
,
continuation_enc
))
return
self
.
_loglikelihood_tokens
(
new_reqs
)
def
loglikelihood_rolling
(
self
,
requests
):
# TODO: Implement caching once we've confirmed the perplexity implementation
# TODO: automatic batch size detection for vectorization
loglikelihoods
=
[]
with
torch
.
no_grad
():
for
string
,
in
tqdm
(
requests
):
rolling_token_windows
=
list
(
map
(
utils
.
make_disjoint_window
,
utils
.
get_rolling_token_windows
(
token_list
=
self
.
tokenizer
.
encode
(
string
,
add_special_tokens
=
False
),
prefix_token
=
self
.
EOT_TOKEN_ID
,
max_seq_len
=
self
.
max_length
,
context_len
=
1
,
)))
rolling_token_windows
=
[(
None
,)
+
x
for
x
in
rolling_token_windows
]
# TODO: extract out this call so it only gets called once and also somehow figure out partial caching for that
string_nll
=
self
.
_loglikelihood_tokens
(
rolling_token_windows
,
disable_tqdm
=
True
)
# discard is_greedy
string_nll
=
[
x
[
0
]
for
x
in
string_nll
]
@
abstractmethod
def
_model_call
(
self
,
inps
):
"""
inps: a torch tensor of shape [batch, sequence]
the size of sequence may vary from call to call
string_nll
=
sum
(
string_nll
)
loglikelihoods
.
append
(
string_nll
)
returns: a torch tensor of shape [batch, sequence, vocab] with the
logits retuned from the model
"""
pass
return
loglikelihoods
# subclass must implement properties batch_size, vocab_size, eot_token_id, max_gen_toks, device.
# TODO: enforce this somehow
def
_loglikelihood_tokens
(
self
,
requests
,
disable_tqdm
=
False
):
# TODO: implement some kind of efficient-request-middleware that lumps together requests with the same context
res
=
[]
with
torch
.
no_grad
():
def
_collate
(
x
):
# the negative sign on len(toks) sorts descending - this has a few advantages:
...
...
@@ -145,7 +66,7 @@ class GPT2LM(LM):
# CTX CONT
# inp 0 1 2 3|4 5 6 7 8 9 <- last token is deleted by inp[:, :-1]
# gpt2 \ \
# logits 1 2 3|4 5 6 7 8 9 <- the ctx half gets tossed out by the [:, -len(continuation_enc):, :self.
VOCAB_SIZE
] slice
# logits 1 2 3|4 5 6 7 8 9 <- the ctx half gets tossed out by the [:, -len(continuation_enc):, :self.
vocab_size
] slice
# cont_toks 4 5 6 7 8 9
# when too long to fit in context, truncate from the left
...
...
@@ -197,23 +118,15 @@ class GPT2LM(LM):
return
reord
.
get_original
(
res
)
def
_model_call
(
self
,
inps
):
"""
inps: a torch tensor of shape [batch, sequence]
the size of sequence may vary from call to call
returns: a torch tensor of shape [batch, sequence, vocab] with the
logits retuned from the model
"""
return
self
.
gpt2
(
inps
)[
0
][:,
:,
:
50257
]
def
greedy_until
(
self
,
requests
):
# TODO: implement fully general `until` that handles untils that are
# multiple tokens or that span multiple tokens correctly
# TODO: extract to TokenizedLM?
res
=
[]
def
_collate
(
x
):
toks
=
self
.
tok
enizer
.
encode
(
x
[
0
]
,
add_special_tokens
=
False
)
toks
=
self
.
tok
_
encode
(
x
[
0
])
return
(
len
(
toks
),
x
[
0
])
reord
=
utils
.
Reorderer
(
requests
,
_collate
)
...
...
@@ -221,18 +134,13 @@ class GPT2LM(LM):
for
context
,
until
in
tqdm
(
reord
.
get_reordered
()):
if
isinstance
(
until
,
str
):
until
=
[
until
]
context_enc
=
torch
.
tensor
([
self
.
tokenizer
.
encode
(
context
,
add_special_tokens
=
False
)[
self
.
MAX_GEN_TOKS
-
self
.
max_length
:]]).
to
(
self
.
device
)
primary_until
,
=
self
.
tok_encode
(
until
[
0
]
)
primary_until
,
=
self
.
tokenizer
.
encode
(
until
[
0
],
add_special_tokens
=
Fals
e
)
context_enc
=
torch
.
tensor
([
self
.
tok_encode
(
context
)[
self
.
max_gen_toks
-
self
.
max_length
:]]).
to
(
self
.
devic
e
)
cont
=
self
.
gpt2
.
generate
(
context_enc
,
max_length
=
context_enc
.
shape
[
1
]
+
self
.
MAX_GEN_TOKS
,
eos_token_id
=
primary_until
,
do_sample
=
False
)
cont
=
self
.
_model_generate
(
context_enc
,
context_enc
.
shape
[
1
]
+
self
.
max_gen_toks
,
primary_until
)
s
=
self
.
tok
enizer
.
decode
(
cont
[
0
].
tolist
()[
context_enc
.
shape
[
1
]:])
s
=
self
.
tok
_
decode
(
cont
[
0
].
tolist
()[
context_enc
.
shape
[
1
]:])
for
term
in
until
:
s
=
s
.
split
(
term
)[
0
]
...
...
@@ -243,3 +151,83 @@ class GPT2LM(LM):
res
.
append
(
s
)
return
reord
.
get_original
(
res
)
class
HFLM
(
TorchLM
):
def
__init__
(
self
,
device
=
'cuda'
,
pretrained
=
'gpt2'
,
revision
=
'main'
,
subfolder
=
None
,
tokenizer
=
None
,
batch_size
=
1
):
super
().
__init__
()
assert
isinstance
(
device
,
str
)
assert
isinstance
(
pretrained
,
str
)
assert
isinstance
(
batch_size
,
int
)
if
device
:
self
.
device
=
torch
.
device
(
device
)
else
:
self
.
device
=
torch
.
device
(
'cuda'
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
'cpu'
)
# TODO: update this to be less of a hack once subfolder is fixed in HF
self
.
gpt2
=
transformers
.
AutoModelForCausalLM
.
from_pretrained
(
pretrained
,
revision
=
revision
+
(
"/"
+
subfolder
if
subfolder
is
not
None
else
""
)).
to
(
self
.
device
)
self
.
gpt2
.
eval
()
# pretrained tokenizer for neo is broken for now so just hardcoding this to gpt2
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
pretrained
if
tokenizer
is
None
else
tokenizer
,
revision
=
revision
,
subfolder
=
subfolder
)
assert
isinstance
(
self
.
tokenizer
,
(
transformers
.
GPT2Tokenizer
,
transformers
.
GPT2TokenizerFast
,
transformers
.
T5Tokenizer
,
transformers
.
T5TokenizerFast
,
)),
"this tokenizer has not been checked for compatibility yet!"
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
self
.
eot_token_id
=
self
.
tokenizer
.
eos_token_id
# we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
self
.
max_gen_toks
=
256
try
:
self
.
max_length
=
self
.
gpt2
.
config
.
n_ctx
except
AttributeError
:
# gptneoconfig doesn't have n_ctx apparantly
self
.
max_length
=
self
.
gpt2
.
config
.
max_position_embeddings
if
isinstance
(
self
.
tokenizer
,
(
transformers
.
GPT2Tokenizer
,
transformers
.
GPT2TokenizerFast
)):
assert
self
.
tokenizer
.
encode
(
'hello
\n\n
hello'
)
==
[
31373
,
198
,
198
,
31373
],
self
.
tokenizer
.
encode
(
'hello
\n\n
hello'
)
# multithreading and batching
gpus
=
torch
.
cuda
.
device_count
()
batch_size_per_gpu
=
batch_size
# todo: adaptive batch size
# TODO: fix multi-gpu
self
.
batch_size
=
batch_size_per_gpu
# * gpus
# TODO: fix multi-gpu
# if gpus > 1:
# self.gpt2 = nn.DataParallel(self.gpt2)
def
tok_encode
(
self
,
string
:
str
):
return
self
.
tokenizer
.
encode
(
string
,
add_special_tokens
=
False
)
def
tok_decode
(
self
,
tokens
):
return
self
.
tokenizer
.
decode
(
tokens
)
def
_model_call
(
self
,
inps
):
"""
inps: a torch tensor of shape [batch, sequence]
the size of sequence may vary from call to call
returns: a torch tensor of shape [batch, sequence, vocab] with the
logits retuned from the model
"""
with
torch
.
no_grad
():
return
self
.
gpt2
(
inps
)[
0
][:,
:,
:
50257
]
def
_model_generate
(
self
,
context
,
max_length
,
eos_token_id
):
return
self
.
gpt2
.
generate
(
context
,
max_length
=
max_length
,
eos_token_id
=
eos_token_id
,
do_sample
=
False
)
# for backwards compability
GPT2LM
=
HFLM
\ No newline at end of file
lm_eval/models/gpt3.py
View file @
7f24a08b
import
os
import
numpy
as
np
import
transformers
from
lm_eval.base
import
LM
from
lm_eval.base
import
LM
,
TokenizedLM
from
lm_eval
import
utils
from
tqdm
import
tqdm
import
time
...
...
@@ -35,11 +35,8 @@ def oa_completion(**kwargs):
backoff_time
*=
1.5
class
GPT3LM
(
LM
):
MAX_LENGTH
=
2048
class
GPT3LM
(
TokenizedLM
):
REQ_CHUNK_SIZE
=
20
MAX_GEN_TOKS
=
256
def
__init__
(
self
,
engine
,
truncate
=
False
):
"""
...
...
@@ -50,10 +47,15 @@ class GPT3LM(LM):
Truncate input if too long (if False and input is too long, throw error)
"""
super
().
__init__
()
import
openai
self
.
engine
=
engine
self
.
tokenizer
=
transformers
.
GPT2TokenizerFast
.
from_pretrained
(
'gpt2'
)
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
self
.
eot_token_id
=
self
.
tokenizer
.
eos_token_id
self
.
max_gen_toks
=
256
self
.
max_length
=
2048
# to make the annoying "Using pad_token, but it is not set yet." error go away
self
.
tokenizer
.
pad_token
=
"<|endoftext|>"
...
...
@@ -64,26 +66,11 @@ class GPT3LM(LM):
# Read from environment variable OPENAI_API_SECRET_KEY
openai
.
api_key
=
os
.
environ
[
"OPENAI_API_SECRET_KEY"
]
@
classmethod
def
create_from_arg_string
(
cls
,
arg_string
,
additional_config
=
{}):
args
=
utils
.
simple_parse_args_string
(
arg_string
)
args2
=
{
k
:
v
for
k
,
v
in
additional_config
.
items
()
if
v
is
not
None
}
return
cls
(
**
args
,
**
args2
)
def
loglikelihood
(
self
,
requests
):
new_reqs
=
[]
for
context
,
continuation
in
requests
:
if
context
==
""
:
# end of text as context
context_enc
=
[
50256
]
else
:
context_enc
=
self
.
tokenizer
.
encode
(
context
)
def
tok_encode
(
self
,
string
:
str
):
return
self
.
tokenizer
.
encode
(
string
,
add_special_tokens
=
False
)
continuation_enc
=
self
.
tokenizer
.
encode
(
continuation
)
new_reqs
.
append
(((
context
,
continuation
),
context_enc
,
continuation_enc
))
return
self
.
_loglikelihood_tokens
(
new_reqs
)
def
tok_decode
(
self
,
tokens
):
return
self
.
tokenizer
.
decode
(
tokens
)
def
loglikelihood_rolling
(
self
,
requests
):
# TODO: switch implementation to use _loglikelihood_tokens rather than having it do its own thing
...
...
@@ -94,7 +81,7 @@ class GPT3LM(LM):
rolling_token_windows
=
utils
.
get_rolling_token_windows
(
token_list
=
encoded
,
prefix_token
=
self
.
end_of_text_token_id
,
max_seq_len
=
self
.
MAX_LENGTH
,
max_seq_len
=
self
.
max_length
,
context_len
=
1
,
)
string_loglikelihoods
=
[]
...
...
@@ -109,8 +96,28 @@ class GPT3LM(LM):
return
loglikelihoods
def
_loglikelihood_tokens
(
self
,
requests
):
import
openai
def
get_token_logprobs
(
self
,
input_tokens
,
pred_tokens
):
pred_start
=
len
(
input_tokens
)
-
len
(
pred_tokens
)
+
1
# We're going to stitch together the input_tokens and pred_tokens
# In the longest case, this gets us to length = max_seq_len+1 (which the API works with)
assert
input_tokens
[
pred_start
:]
==
pred_tokens
[:
-
1
]
token_ids
=
input_tokens
+
[
pred_tokens
[
-
1
]]
response
=
oa_completion
(
engine
=
self
.
engine
,
prompt
=
token_ids
,
max_tokens
=
0
,
temperature
=
0.0
,
logprobs
=
0
,
echo
=
True
,
)
logprobs
=
np
.
array
(
response
[
"choices"
][
0
][
"logprobs"
][
"token_logprobs"
][
pred_start
:])
positions
=
np
.
arange
(
pred_start
-
1
,
pred_start
-
1
+
len
(
token_ids
[
pred_start
:]))
return
{
"logprobs"
:
logprobs
,
"positions"
:
positions
,
}
def
_loglikelihood_tokens
(
self
,
requests
,
disable_tqdm
=
False
):
res
=
[]
def
_collate
(
x
):
...
...
@@ -122,12 +129,12 @@ class GPT3LM(LM):
reord
=
utils
.
Reorderer
(
requests
,
_collate
)
for
chunk
in
tqdm
(
list
(
utils
.
chunks
(
reord
.
get_reordered
(),
self
.
REQ_CHUNK_SIZE
))):
for
chunk
in
tqdm
(
list
(
utils
.
chunks
(
reord
.
get_reordered
(),
self
.
REQ_CHUNK_SIZE
))
,
disable
=
disable_tqdm
):
inps
=
[]
ctxlens
=
[]
for
cache_key
,
context_enc
,
continuation_enc
in
chunk
:
inp
=
(
context_enc
+
continuation_enc
)[
-
self
.
MAX_LENGTH
:]
ctxlen
=
len
(
context_enc
)
-
max
(
0
,
len
(
context_enc
)
+
len
(
continuation_enc
)
-
self
.
MAX_LENGTH
)
inp
=
(
context_enc
+
continuation_enc
)[
-
self
.
max_length
:]
ctxlen
=
len
(
context_enc
)
-
max
(
0
,
len
(
context_enc
)
+
len
(
continuation_enc
)
-
self
.
max_length
)
inps
.
append
(
inp
)
ctxlens
.
append
(
ctxlen
)
...
...
@@ -151,34 +158,13 @@ class GPT3LM(LM):
return
reord
.
get_original
(
res
)
def
get_token_logprobs
(
self
,
input_tokens
,
pred_tokens
):
pred_start
=
len
(
input_tokens
)
-
len
(
pred_tokens
)
+
1
# We're going to stitch together the input_tokens and pred_tokens
# In the longest case, this gets us to length = max_seq_len+1 (which the API works with)
assert
input_tokens
[
pred_start
:]
==
pred_tokens
[:
-
1
]
token_ids
=
input_tokens
+
[
pred_tokens
[
-
1
]]
response
=
oa_completion
(
engine
=
self
.
engine
,
prompt
=
token_ids
,
max_tokens
=
0
,
temperature
=
0.0
,
logprobs
=
0
,
echo
=
True
,
)
logprobs
=
np
.
array
(
response
[
"choices"
][
0
][
"logprobs"
][
"token_logprobs"
][
pred_start
:])
positions
=
np
.
arange
(
pred_start
-
1
,
pred_start
-
1
+
len
(
token_ids
[
pred_start
:]))
return
{
"logprobs"
:
logprobs
,
"positions"
:
positions
,
}
def
greedy_until
(
self
,
requests
):
if
not
requests
:
return
[]
import
openai
res
=
[]
def
_collate
(
x
):
toks
=
self
.
tok
enizer
.
encode
(
x
[
0
])
toks
=
self
.
tok
_
encode
(
x
[
0
])
return
(
len
(
toks
),
x
[
0
])
reord
=
utils
.
Reorderer
(
requests
,
_collate
)
...
...
@@ -199,14 +185,14 @@ class GPT3LM(LM):
for
chunk
,
until
in
tqdm
(
list
(
sameuntil_chunks
(
reord
.
get_reordered
(),
self
.
REQ_CHUNK_SIZE
))):
inps
=
[]
for
context
,
_
in
chunk
:
context_enc
=
self
.
tok
enizer
.
encode
(
context
)
inp
=
context_enc
[
-
(
self
.
MAX_LENGTH
-
self
.
MAX_GEN_TOKS
):]
context_enc
=
self
.
tok
_
encode
(
context
)
inp
=
context_enc
[
-
(
self
.
max_length
-
self
.
max_gen_toks
):]
inps
.
append
(
inp
)
response
=
oa_completion
(
engine
=
self
.
engine
,
prompt
=
inps
,
max_tokens
=
self
.
MAX_GEN_TOKS
,
max_tokens
=
self
.
max_gen_toks
,
temperature
=
0.
,
logprobs
=
10
,
stop
=
until
...
...
tests/test_gpt3.py
View file @
7f24a08b
...
...
@@ -85,7 +85,7 @@ def test_gpt3_perplexity():
assert
perplexity
==
pytest
.
approx
(
tgt
,
rel
=
1e-3
)
# Hack: modify gpt3 to have shorter context length to induce rolling windows
gpt3
.
MAX_LENGTH
=
5
gpt3
.
max_length
=
5
perplexity
=
gpt3
.
loglikelihood_rolling
([(
test_string
,)])[
0
]
tgt
=
-
101.93490880000002
assert
perplexity
==
pytest
.
approx
(
tgt
,
rel
=
1e-3
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment