Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
fbea4311
"vscode:/vscode.git/clone" did not exist on "0375a63b548b77720c175152ba43c896b03f71b8"
Commit
fbea4311
authored
May 21, 2023
by
jon-tow
Browse files
fix: remove tokenizer costraint in `gpt2`
parent
2d843472
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
18 deletions
+18
-18
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+18
-18
No files found.
lm_eval/models/gpt2.py
100644 → 100755
View file @
fbea4311
...
@@ -47,27 +47,27 @@ class HFLM(BaseLM):
...
@@ -47,27 +47,27 @@ class HFLM(BaseLM):
revision
=
revision
,
revision
=
revision
,
)
)
assert
isinstance
(
#
assert isinstance(
self
.
tokenizer
,
#
self.tokenizer,
(
#
(
transformers
.
GPT2Tokenizer
,
#
transformers.GPT2Tokenizer,
transformers
.
GPT2TokenizerFast
,
#
transformers.GPT2TokenizerFast,
transformers
.
T5Tokenizer
,
#
transformers.T5Tokenizer,
transformers
.
T5TokenizerFast
,
#
transformers.T5TokenizerFast,
),
#
),
),
"this tokenizer has not been checked for compatibility yet!"
#
), "this tokenizer has not been checked for compatibility yet!"
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
if
isinstance
(
#
if isinstance(
self
.
tokenizer
,
(
transformers
.
GPT2Tokenizer
,
transformers
.
GPT2TokenizerFast
)
#
self.tokenizer, (transformers.GPT2Tokenizer, transformers.GPT2TokenizerFast)
):
#
):
assert
self
.
tokenizer
.
encode
(
"hello
\n\n
hello"
)
==
[
#
assert self.tokenizer.encode("hello\n\nhello") == [
31373
,
#
31373,
198
,
#
198,
198
,
#
198,
31373
,
#
31373,
],
self
.
tokenizer
.
encode
(
"hello
\n\n
hello"
)
#
], self.tokenizer.encode("hello\n\nhello")
# multithreading and batching
# multithreading and batching
self
.
batch_size_per_gpu
=
batch_size
# todo: adaptive batch size
self
.
batch_size_per_gpu
=
batch_size
# todo: adaptive batch size
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment