Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
86e78589
Commit
86e78589
authored
Sep 04, 2023
by
lintangsutawika
Browse files
modified changes to fix loglikelihood prediction for seq2seq
parent
0d195e90
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
7 deletions
+12
-7
lm_eval/models/huggingface.py
lm_eval/models/huggingface.py
+12
-7
No files found.
lm_eval/models/huggingface.py
View file @
86e78589
...
...
@@ -409,8 +409,9 @@ class HFLM(LM):
utils
.
clear_torch_cache
()
return
batch_size
def
tok_encode
(
self
,
string
:
str
,
left_truncate_len
=
None
):
def
tok_encode
(
self
,
string
:
str
,
left_truncate_len
=
None
,
add_special_tokens
=
None
):
""" """
if
add_special_tokens
is
None
:
if
self
.
AUTO_MODEL_CLASS
==
transformers
.
AutoModelForCausalLM
:
add_special_tokens
=
False
elif
self
.
AUTO_MODEL_CLASS
==
transformers
.
AutoModelForSeq2SeqLM
:
...
...
@@ -529,8 +530,12 @@ class HFLM(LM):
if
n_spaces
>
0
:
continuation
=
context
[
-
n_spaces
:]
+
continuation
context
=
context
[:
-
n_spaces
]
whole_enc
=
self
.
tok_encode
(
context
+
continuation
)
context_enc
=
self
.
tok_encode
(
context
)
whole_enc
=
self
.
tok_encode
(
context
+
continuation
,
add_special_tokens
=
False
)
context_enc
=
self
.
tok_encode
(
context
,
add_special_tokens
=
False
)
# whole_enc = self.tok_encode(context + continuation)
# context_enc = self.tok_encode(context, add_special_tokens=False)
context_enc_len
=
len
(
context_enc
)
continuation_enc
=
whole_enc
[
context_enc_len
:]
return
context_enc
,
continuation_enc
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment