Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c93093b6
"test/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "123f7a01d8f63d18d0a3a5433b6eed3d0749f07e"
Commit
c93093b6
authored
Apr 25, 2022
by
cjlovering
Browse files
Removed the default option for an acc task
parent
4ae2ab37
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
27 additions
and
61 deletions
+27
-61
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+27
-61
No files found.
lm_eval/tasks/superglue.py
View file @
c93093b6
...
@@ -54,16 +54,6 @@ class BoolQ(PromptSourceTask):
...
@@ -54,16 +54,6 @@ class BoolQ(PromptSourceTask):
def
validation_docs
(
self
):
def
validation_docs
(
self
):
return
self
.
dataset
[
"validation"
]
return
self
.
dataset
[
"validation"
]
def
higher_is_better
(
self
):
return
{
"acc"
:
True
}
def
aggregation
(
self
):
return
{
"acc"
:
mean
}
class
CommitmentBank
(
PromptSourceTask
):
class
CommitmentBank
(
PromptSourceTask
):
VERSION
=
1
VERSION
=
1
...
@@ -90,18 +80,12 @@ class CommitmentBank(PromptSourceTask):
...
@@ -90,18 +80,12 @@ class CommitmentBank(PromptSourceTask):
def
process_results
(
self
,
doc
,
results
):
def
process_results
(
self
,
doc
,
results
):
gold
=
doc
[
"label"
]
gold
=
doc
[
"label"
]
pred
=
np
.
argmax
(
results
)
pred
=
np
.
argmax
(
results
)
acc
=
1.
if
pred
==
gold
else
0.
acc
=
1.0
if
pred
==
gold
else
0.0
return
{
"acc"
:
acc
,
"f1"
:
(
pred
,
gold
)}
return
{
"acc"
:
acc
,
"f1"
:
(
pred
,
gold
)
}
def
higher_is_better
(
self
):
def
higher_is_better
(
self
):
return
{
return
{
"acc"
:
True
,
"f1"
:
True
}
"acc"
:
True
,
"f1"
:
True
}
@
classmethod
@
classmethod
def
cb_multi_fi
(
cls
,
items
):
def
cb_multi_fi
(
cls
,
items
):
...
@@ -113,7 +97,7 @@ class CommitmentBank(PromptSourceTask):
...
@@ -113,7 +97,7 @@ class CommitmentBank(PromptSourceTask):
f13
=
sklearn
.
metrics
.
f1_score
(
y_true
=
golds
==
2
,
y_pred
=
preds
==
2
)
f13
=
sklearn
.
metrics
.
f1_score
(
y_true
=
golds
==
2
,
y_pred
=
preds
==
2
)
avg_f1
=
mean
([
f11
,
f12
,
f13
])
avg_f1
=
mean
([
f11
,
f12
,
f13
])
return
avg_f1
return
avg_f1
def
aggregation
(
self
):
def
aggregation
(
self
):
return
{
return
{
"acc"
:
mean
,
"acc"
:
mean
,
...
@@ -146,21 +130,15 @@ class Copa(PromptSourceTask):
...
@@ -146,21 +130,15 @@ class Copa(PromptSourceTask):
def
process_results
(
self
,
doc
,
results
):
def
process_results
(
self
,
doc
,
results
):
gold
=
doc
[
"label"
]
gold
=
doc
[
"label"
]
pred
=
np
.
argmax
(
results
)
pred
=
np
.
argmax
(
results
)
acc
=
1.
if
pred
==
gold
else
0.
acc
=
1.0
if
pred
==
gold
else
0.0
return
{
"acc"
:
acc
}
return
{
"acc"
:
acc
}
def
higher_is_better
(
self
):
def
higher_is_better
(
self
):
return
{
return
{
"acc"
:
True
}
"acc"
:
True
}
def
aggregation
(
self
):
def
aggregation
(
self
):
return
{
return
{
"acc"
:
mean
}
"acc"
:
mean
}
@
staticmethod
@
staticmethod
def
convert_choice
(
choice
):
def
convert_choice
(
choice
):
...
@@ -192,19 +170,13 @@ class MultiRC(PromptSourceTask):
...
@@ -192,19 +170,13 @@ class MultiRC(PromptSourceTask):
def
process_results
(
self
,
doc
,
results
):
def
process_results
(
self
,
doc
,
results
):
ll_true_choice
,
ll_false_choice
=
results
ll_true_choice
,
ll_false_choice
=
results
pred
=
ll_true_choice
>
ll_false_choice
pred
=
ll_true_choice
>
ll_false_choice
return
{
return
{
"acc"
:
(
pred
,
doc
)}
"acc"
:
(
pred
,
doc
)
}
def
higher_is_better
(
self
):
def
higher_is_better
(
self
):
return
{
return
{
"acc"
:
True
}
"acc"
:
True
}
def
aggregation
(
self
):
def
aggregation
(
self
):
return
{
return
{
"acc"
:
acc_all
}
"acc"
:
acc_all
}
class
ReCoRD
(
PromptSourceTask
):
class
ReCoRD
(
PromptSourceTask
):
...
@@ -255,8 +227,12 @@ class ReCoRD(PromptSourceTask):
...
@@ -255,8 +227,12 @@ class ReCoRD(PromptSourceTask):
prediction
=
doc
[
"entities"
][
max_idx
]
prediction
=
doc
[
"entities"
][
max_idx
]
gold_label_set
=
doc
[
"answers"
]
gold_label_set
=
doc
[
"answers"
]
f1
=
metric_max_over_ground_truths
(
squad_metrics
.
compute_f1
,
prediction
,
gold_label_set
)
f1
=
metric_max_over_ground_truths
(
em
=
metric_max_over_ground_truths
(
squad_metrics
.
compute_exact
,
prediction
,
gold_label_set
)
squad_metrics
.
compute_f1
,
prediction
,
gold_label_set
)
em
=
metric_max_over_ground_truths
(
squad_metrics
.
compute_exact
,
prediction
,
gold_label_set
)
return
{
return
{
"f1"
:
f1
,
"f1"
:
f1
,
...
@@ -299,14 +275,10 @@ class WordsInContext(PromptSourceTask):
...
@@ -299,14 +275,10 @@ class WordsInContext(PromptSourceTask):
return
self
.
dataset
[
"validation"
]
return
self
.
dataset
[
"validation"
]
def
higher_is_better
(
self
):
def
higher_is_better
(
self
):
return
{
return
{
"acc"
:
True
}
"acc"
:
True
}
def
aggregation
(
self
):
def
aggregation
(
self
):
return
{
return
{
"acc"
:
mean
}
"acc"
:
mean
}
class
SGWinogradSchemaChallenge
(
PromptSourceTask
):
class
SGWinogradSchemaChallenge
(
PromptSourceTask
):
...
@@ -330,9 +302,7 @@ class SGWinogradSchemaChallenge(PromptSourceTask):
...
@@ -330,9 +302,7 @@ class SGWinogradSchemaChallenge(PromptSourceTask):
if
self
.
_training_docs
is
None
:
if
self
.
_training_docs
is
None
:
# GPT-3 Paper's format only uses positive examples for fewshot "training"
# GPT-3 Paper's format only uses positive examples for fewshot "training"
self
.
_training_docs
=
[
self
.
_training_docs
=
[
doc
for
doc
in
doc
for
doc
in
self
.
dataset
[
"train"
]
if
doc
[
"label"
]
self
.
dataset
[
"train"
]
if
doc
[
"label"
]
]
]
return
self
.
_training_docs
return
self
.
_training_docs
...
@@ -340,11 +310,7 @@ class SGWinogradSchemaChallenge(PromptSourceTask):
...
@@ -340,11 +310,7 @@ class SGWinogradSchemaChallenge(PromptSourceTask):
return
self
.
dataset
[
"validation"
]
return
self
.
dataset
[
"validation"
]
def
higher_is_better
(
self
):
def
higher_is_better
(
self
):
return
{
return
{
"acc"
:
True
}
"acc"
:
True
}
def
aggregation
(
self
):
def
aggregation
(
self
):
return
{
return
{
"acc"
:
mean
}
"acc"
:
mean
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment