Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c971fa82
Commit
c971fa82
authored
Mar 26, 2021
by
Leo Gao
Browse files
Fix stuff and make tests pass
parent
0966e7b6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
6 deletions
+10
-6
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+7
-3
lm_eval/tasks/glue.py
lm_eval/tasks/glue.py
+2
-2
tests/test_evaluator.py
tests/test_evaluator.py
+1
-1
No files found.
lm_eval/models/gpt2.py
View file @
c971fa82
...
...
@@ -60,12 +60,16 @@ class GPT2LM(LM):
greedy_tokens
=
logits
.
argmax
(
dim
=-
1
)
max_equal
=
(
greedy_tokens
==
cont_toks
).
all
()
l
ogits
=
torch
.
gather
(
logits
,
2
,
cont_toks
.
unsqueeze
(
-
1
)).
squeeze
(
-
1
)
# [batch, seq]
l
ast_token_slice
=
logits
[:,
-
1
,
:].
squeeze
(
0
).
tolist
()
logits
=
torch
.
gather
(
logits
,
2
,
cont_toks
.
unsqueeze
(
-
1
)).
squeeze
(
-
1
)
# [batch, seq]
res
.
append
((
float
(
logits
.
sum
())
,
bool
(
max_equal
)))
res
.
append
((
float
(
logits
[:,
:
-
1
].
sum
()
if
logits
.
shape
[
-
1
]
>
1
else
0
),
last_token_slice
,
bool
(
max_equal
)))
return
reord
.
get_original
(
res
)
# optimization: if two requests have everything the same except the last token, use
# last token distribution to save compute
lasttoks
=
[
self
.
tokenizer
.
encode
(
x
[
1
])[
-
1
]
for
x
in
requests
]
return
[(
l
+
lts
[
lasttok
],
m
)
for
(
l
,
lts
,
m
),
lasttok
in
zip
(
reord
.
get_original
(
res
),
lasttoks
)]
def
greedy_until
(
self
,
requests
):
# TODO: implement fully general `until` that handles untils that are
...
...
lm_eval/tasks/glue.py
View file @
c971fa82
...
...
@@ -334,7 +334,7 @@ class MRPC(HFTask):
return
True
def
has_test_docs
(
self
):
return
Tru
e
return
Fals
e
def
fewshot_description
(
self
):
return
"Indicate if both sentences mean the same thing."
...
...
@@ -386,7 +386,7 @@ class QQP(HFTask):
return
True
def
has_test_docs
(
self
):
return
Tru
e
return
Fals
e
def
fewshot_description
(
self
):
return
"Indicate if both questions ask the same thing."
...
...
tests/test_evaluator.py
View file @
c971fa82
...
...
@@ -29,4 +29,4 @@ def test_evaluator(taskname, Task):
lm
.
loglikelihood
=
ll_fn
evaluator
.
evaluate
(
lm
,
task_dict
,
False
,
0
,
10
)
\ No newline at end of file
evaluator
.
evaluate
(
lm
,
task_dict
,
False
,
0
,
3
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment