Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
829280e8
Commit
829280e8
authored
Jun 07, 2023
by
lintangsutawika
Browse files
adjustments on import origin
parent
9c3f7227
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
48 additions
and
17 deletions
+48
-17
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+3
-5
lm_eval/tasks/arc.py
lm_eval/tasks/arc.py
+1
-1
lm_eval/tasks/gsm8k.py
lm_eval/tasks/gsm8k.py
+1
-1
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+1
-1
lm_eval/tasks/pile.py
lm_eval/tasks/pile.py
+1
-1
lm_eval/tasks/super_glue/boolq/default.yaml
lm_eval/tasks/super_glue/boolq/default.yaml
+11
-0
lm_eval/tasks/super_glue/cb/aggregate.py
lm_eval/tasks/super_glue/cb/aggregate.py
+13
-0
lm_eval/tasks/super_glue/cb/default.yaml
lm_eval/tasks/super_glue/cb/default.yaml
+15
-0
lm_eval/tasks/wikitext.py
lm_eval/tasks/wikitext.py
+1
-1
lm_eval/tasks/wikitext/wikitext.yaml
lm_eval/tasks/wikitext/wikitext.yaml
+1
-7
No files found.
lm_eval/tasks/__init__.py
View file @
829280e8
...
...
@@ -5,11 +5,11 @@ from typing import List, Union
from
lm_eval
import
utils
from
lm_eval.logger
import
eval_logger
from
lm_eval.api.task
import
TaskConfig
,
Task
,
ConfigurableTask
from
lm_eval.api.regist
e
r
import
(
from
lm_eval.api.registr
y
import
(
register_task
,
register_group
,
task_registry
,
group_registry
,
TASK_REGISTRY
,
GROUP_REGISTRY
,
)
...
...
@@ -48,8 +48,6 @@ for root, subdirs, file_list in os.walk(task_dir):
" Config will not be added to registry"
)
TASK_REGISTRY
=
task_registry
GROUP_REGISTRY
=
group_registry
ALL_TASKS
=
sorted
(
list
(
TASK_REGISTRY
.
keys
())
+
list
(
GROUP_REGISTRY
.
keys
()))
...
...
lm_eval/tasks/arc.py
View file @
829280e8
...
...
@@ -16,7 +16,7 @@ from lm_eval import utils
from
lm_eval.prompts
import
get_prompt
from
lm_eval.api.task
import
MultipleChoiceTask
from
lm_eval.api.regist
e
r
import
register_task
,
register_group
from
lm_eval.api.registr
y
import
register_task
,
register_group
_CITATION
=
"""
@article{Clark2018ThinkYH,
...
...
lm_eval/tasks/gsm8k.py
View file @
829280e8
...
...
@@ -24,7 +24,7 @@ from lm_eval.api.instance import Instance
from
lm_eval.prompts
import
get_prompt
from
lm_eval.api.regist
e
r
import
register_task
,
register_group
from
lm_eval.api.registr
y
import
register_task
,
register_group
_CITATION
=
"""
@misc{cobbe2021training,
...
...
lm_eval/tasks/lambada.py
View file @
829280e8
...
...
@@ -16,7 +16,7 @@ from lm_eval.api.task import Task
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.metrics
import
mean
,
perplexity
from
lm_eval.api.regist
e
r
import
register_task
,
register_group
from
lm_eval.api.registr
y
import
register_task
,
register_group
_CITATION
=
"""
@misc{
...
...
lm_eval/tasks/pile.py
View file @
829280e8
...
...
@@ -12,7 +12,7 @@ Homepage: https://pile.eleuther.ai/
from
lm_eval.api.task
import
PerplexityTask
from
lm_eval.api.regist
e
r
import
register_task
,
register_group
from
lm_eval.api.registr
y
import
register_task
,
register_group
_CITATION
=
"""
@article{pile,
...
...
lm_eval/tasks/super_glue/boolq/default.yaml
0 → 100644
View file @
829280e8
group
:
-
super-glue-lm-eval-v1
task
:
"
default"
dataset_path
:
super_glue
dataset_name
:
boolq
output_type
:
multiple_choice
training_split
:
train
validation_split
:
validation
doc_to_text
:
"
{{passage}}
\n
Question:
{{question}}
\n
Answer:"
doc_to_target
:
"
{{label}}"
# this will be cast to an int.
template_aliases
:
"
{%
set
answer_choices
=
['no',
'yes']
%}"
lm_eval/tasks/super_glue/cb/aggregate.py
0 → 100644
View file @
829280e8
import
sklearn
import
numpy
as
np
def
cb_multi_fi
(
items
):
preds
,
golds
=
zip
(
*
items
)
preds
=
np
.
array
(
preds
)
golds
=
np
.
array
(
golds
)
f11
=
sklearn
.
metrics
.
f1_score
(
y_true
=
golds
==
0
,
y_pred
=
preds
==
0
)
f12
=
sklearn
.
metrics
.
f1_score
(
y_true
=
golds
==
1
,
y_pred
=
preds
==
1
)
f13
=
sklearn
.
metrics
.
f1_score
(
y_true
=
golds
==
2
,
y_pred
=
preds
==
2
)
avg_f1
=
np
.
mean
([
f11
,
f12
,
f13
])
return
avg_f1
lm_eval/tasks/super_glue/cb/default.yaml
0 → 100644
View file @
829280e8
group
:
-
super-glue-lm-eval-v1
task
:
"
default"
dataset_path
:
super_glue
dataset_name
:
cb
output_type
:
multiple_choice
training_split
:
train
validation_split
:
validation
doc_to_text
:
"
{{premise}}
\n
Question:
{{hypothesis}}.
True,
False,
or
Neither?
\n
Answer:"
doc_to_target
:
"
{{label}}"
# this will be cast to an int.
template_aliases
:
"
{%
set
answer_choices
=
['True',
'False',
'Neither']
%}"
metric_list
:
-
metric
:
acc
-
metric
:
f1
aggregation
:
!function
"
aggregate.cb_multi_fi"
lm_eval/tasks/wikitext.py
View file @
829280e8
...
...
@@ -13,7 +13,7 @@ import re
from
lm_eval.api.task
import
PerplexityTask
from
lm_eval.api.regist
e
r
import
register_task
,
register_group
from
lm_eval.api.registr
y
import
register_task
,
register_group
_CITATION
=
"""
@misc{merity2016pointer,
...
...
lm_eval/tasks/wikitext/wikitext.yaml
View file @
829280e8
group
:
-
wikitext_group
task
:
wikitext_yaml
task
:
default
dataset_path
:
EleutherAI/wikitext_document_level
dataset_name
:
wikitext-2-raw-v1
output_type
:
loglikelihood_rolling
...
...
@@ -14,11 +14,5 @@ should_decontaminate: true
doc_to_decontamination_query
:
"
{{page}}"
metric_list
:
-
metric
:
word_perplexity
aggregation
:
weighted_perplexity
higher_is_better
:
false
-
metric
:
byte_perplexity
aggregation
:
weighted_perplexity
higher_is_better
:
false
-
metric
:
bits_per_byte
aggregation
:
bits_per_byte
higher_is_better
:
false
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment