Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
62976337
Commit
62976337
authored
Apr 04, 2022
by
soqeue1
Browse files
fix: remove assert
parent
2987beb0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
9 deletions
+9
-9
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+9
-9
No files found.
lm_eval/models/gpt2.py
View file @
62976337
...
@@ -27,16 +27,16 @@ class HFLM(BaseLM):
...
@@ -27,16 +27,16 @@ class HFLM(BaseLM):
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
pretrained
if
tokenizer
is
None
else
tokenizer
,
revision
=
revision
,
subfolder
=
subfolder
)
pretrained
if
tokenizer
is
None
else
tokenizer
,
revision
=
revision
,
subfolder
=
subfolder
)
assert
isinstance
(
self
.
tokenizer
,
(
#
assert isinstance(self.tokenizer, (
transformers
.
GPT2Tokenizer
,
transformers
.
GPT2TokenizerFast
,
#
transformers.GPT2Tokenizer, transformers.GPT2TokenizerFast,
transformers
.
T5Tokenizer
,
transformers
.
T5TokenizerFast
,
#
transformers.T5Tokenizer, transformers.T5TokenizerFast,
)),
"this tokenizer has not been checked for compatibility yet!"
#
)), "this tokenizer has not been checked for compatibility yet!"
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
if
isinstance
(
self
.
tokenizer
,
(
transformers
.
GPT2Tokenizer
,
transformers
.
GPT2TokenizerFast
)):
#
if isinstance(self.tokenizer, (transformers.GPT2Tokenizer, transformers.GPT2TokenizerFast)):
assert
self
.
tokenizer
.
encode
(
'hello
\n\n
hello'
)
==
[
31373
,
198
,
198
,
31373
],
\
#
assert self.tokenizer.encode('hello\n\nhello') == [31373, 198, 198, 31373], \
self
.
tokenizer
.
encode
(
'hello
\n\n
hello'
)
#
self.tokenizer.encode('hello\n\nhello')
# multithreading and batching
# multithreading and batching
self
.
batch_size_per_gpu
=
batch_size
# todo: adaptive batch size
self
.
batch_size_per_gpu
=
batch_size
# todo: adaptive batch size
...
@@ -75,7 +75,7 @@ class HFLM(BaseLM):
...
@@ -75,7 +75,7 @@ class HFLM(BaseLM):
def
tok_encode
(
self
,
string
:
str
):
def
tok_encode
(
self
,
string
:
str
):
return
self
.
tokenizer
.
encode
(
string
,
add_special_tokens
=
False
)
return
self
.
tokenizer
.
encode
(
string
,
add_special_tokens
=
False
)
def
tok_decode
(
self
,
tokens
):
def
tok_decode
(
self
,
tokens
):
return
self
.
tokenizer
.
decode
(
tokens
)
return
self
.
tokenizer
.
decode
(
tokens
)
...
@@ -89,7 +89,7 @@ class HFLM(BaseLM):
...
@@ -89,7 +89,7 @@ class HFLM(BaseLM):
"""
"""
with
torch
.
no_grad
():
with
torch
.
no_grad
():
return
self
.
gpt2
(
inps
)[
0
][:,
:,
:
50257
]
return
self
.
gpt2
(
inps
)[
0
][:,
:,
:
50257
]
def
_model_generate
(
self
,
context
,
max_length
,
eos_token_id
):
def
_model_generate
(
self
,
context
,
max_length
,
eos_token_id
):
return
self
.
gpt2
.
generate
(
return
self
.
gpt2
.
generate
(
context
,
context
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment