Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
32624a1d
Commit
32624a1d
authored
Sep 07, 2020
by
Jason Phang
Browse files
remove seeding
parent
f88bb827
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
8 deletions
+14
-8
lm_eval/base.py
lm_eval/base.py
+2
-2
lm_eval/tasks/common.py
lm_eval/tasks/common.py
+7
-1
lm_eval/tasks/glue.py
lm_eval/tasks/glue.py
+2
-2
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+3
-3
No files found.
lm_eval/base.py
View file @
32624a1d
...
@@ -80,7 +80,6 @@ class Dataset(abc.ABC):
...
@@ -80,7 +80,6 @@ class Dataset(abc.ABC):
traindocs
=
list
(
self
.
training_docs
())
traindocs
=
list
(
self
.
training_docs
())
random
.
seed
(
123
)
random
.
seed
(
123
)
random
.
shuffle
(
traindocs
)
random
.
shuffle
(
traindocs
)
return
traindocs
[:
k
]
return
traindocs
[:
k
]
@
abc
.
abstractmethod
@
abc
.
abstractmethod
...
@@ -107,7 +106,8 @@ class Dataset(abc.ABC):
...
@@ -107,7 +106,8 @@ class Dataset(abc.ABC):
return
""
return
""
def
fewshot_context
(
self
,
doc
,
num_fewshot
,
provide_description
):
def
fewshot_context
(
self
,
doc
,
num_fewshot
,
provide_description
):
description
=
(
self
.
fewshot_description
()
+
"
\n\n
"
)
if
provide_description
else
""
raw_description
=
self
.
fewshot_description
()
description
=
(
raw_description
+
"
\n\n
"
)
if
provide_description
and
raw_description
else
""
labeled_examples
=
"
\n\n
"
.
join
(
labeled_examples
=
"
\n\n
"
.
join
(
map
(
self
.
doc_to_text
,
self
.
fewshot_examples
(
k
=
num_fewshot
))
map
(
self
.
doc_to_text
,
self
.
fewshot_examples
(
k
=
num_fewshot
))
)
+
"
\n\n
"
)
+
"
\n\n
"
...
...
lm_eval/tasks/common.py
View file @
32624a1d
import
abc
import
nlp
import
nlp
import
numpy
as
np
import
numpy
as
np
import
random
from
..base
import
Dataset
from
..base
import
Dataset
...
@@ -23,6 +23,12 @@ class NLP_TASK(Dataset):
...
@@ -23,6 +23,12 @@ class NLP_TASK(Dataset):
if
self
.
has_test_docs
():
if
self
.
has_test_docs
():
return
self
.
_load_nlp_dataset
()[
"test"
]
return
self
.
_load_nlp_dataset
()[
"test"
]
def
fewshot_examples
(
self
,
k
):
training_docs
=
self
.
training_docs
()
n
=
len
(
training_docs
)
indices
=
random
.
sample
(
range
(
n
),
k
)
return
[
training_docs
[
i
]
for
i
in
indices
]
def
simple_accuracy_metric
(
preds
,
golds
):
def
simple_accuracy_metric
(
preds
,
golds
):
acc
=
float
((
np
.
array
(
preds
)
==
np
.
array
(
golds
)).
mean
())
acc
=
float
((
np
.
array
(
preds
)
==
np
.
array
(
golds
)).
mean
())
...
...
lm_eval/tasks/glue.py
View file @
32624a1d
...
@@ -177,7 +177,7 @@ class RTE(NLP_TASK):
...
@@ -177,7 +177,7 @@ class RTE(NLP_TASK):
doc
[
"sentence2"
],
doc
[
"sentence2"
],
)
)
if
include_target
:
if
include_target
:
text
+=
" {}"
.
format
({
1
:
"True"
,
0
:
"False"
}[
doc
[
"label"
]])
text
+=
" {}"
.
format
({
0
:
"True"
,
1
:
"False"
}[
doc
[
"label"
]])
return
text
return
text
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
...
@@ -189,7 +189,7 @@ class RTE(NLP_TASK):
...
@@ -189,7 +189,7 @@ class RTE(NLP_TASK):
provide_description
=
provide_description
,
provide_description
=
provide_description
,
num_fewshot
=
num_fewshot
,
num_fewshot
=
num_fewshot
,
)
)
preds
.
append
(
lm
.
loglikelihood
(
ctx
,
'
Tru
e'
)
>
lm
.
loglikelihood
(
ctx
,
'
Fals
e'
))
preds
.
append
(
lm
.
loglikelihood
(
ctx
,
'
Fals
e'
)
>
lm
.
loglikelihood
(
ctx
,
'
Tru
e'
))
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
...
...
lm_eval/tasks/superglue.py
View file @
32624a1d
...
@@ -4,7 +4,7 @@ from . import TASK_REGISTRY
...
@@ -4,7 +4,7 @@ from . import TASK_REGISTRY
@
TASK_REGISTRY
.
register
(
"boolq"
)
@
TASK_REGISTRY
.
register
(
"boolq"
)
class
BoolQ
(
NLP_TASK
):
class
BoolQ
(
NLP_TASK
):
NLP_PATH
=
"superglue"
NLP_PATH
=
"super
_
glue"
NLP_NAME
=
"boolq"
NLP_NAME
=
"boolq"
def
has_training_docs
(
self
):
def
has_training_docs
(
self
):
...
@@ -21,10 +21,10 @@ class BoolQ(NLP_TASK):
...
@@ -21,10 +21,10 @@ class BoolQ(NLP_TASK):
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
return
f
"
{
doc
[
'passage'
]
}
\n
question:
{
doc
[
'question'
]
}
\n
answer: "
\
return
f
"
{
doc
[
'passage'
]
}
\n
question:
{
doc
[
'question'
]
}
\n
answer: "
\
+
(
yesno
(
doc
[
'
answer
'
])
if
include_target
else
""
)
+
(
yesno
(
doc
[
'
label
'
])
if
include_target
else
""
)
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
golds
=
[
doc
[
"
answer
"
]
for
doc
in
docs
]
golds
=
[
doc
[
"
label
"
]
for
doc
in
docs
]
preds
=
[]
preds
=
[]
for
doc
in
docs
:
for
doc
in
docs
:
ctx
=
self
.
fewshot_context
(
ctx
=
self
.
fewshot_context
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment