Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0f283a9c
Commit
0f283a9c
authored
Feb 24, 2022
by
Leo Gao
Browse files
further interface changes
parent
b2460099
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
11 deletions
+9
-11
lm_eval/decontamination.py
lm_eval/decontamination.py
+4
-2
lm_eval/evaluator.py
lm_eval/evaluator.py
+5
-9
No files found.
lm_eval/decontamination.py
View file @
0f283a9c
...
...
@@ -14,7 +14,7 @@ try:
import
janitor_util
JANITOR_CPP
=
True
except
Exception
as
e
:
print
(
"WARNING: C++ module could not be loaded. Janitor running in python mode"
)
#
print("WARNING: C++ module could not be loaded. Janitor running in python mode")
JANITOR_CPP
=
False
# Was used for testing the evaluator decoupled from the full logic below
...
...
@@ -41,9 +41,11 @@ def get_train_overlap_stub(docs, ngrams_path, ngrams_n_size):
# We cache the task+set lookups as well as the overlaps.
#
# Currently calculating some per file ngram stats for interest, might remove before merging into main
def
get_train_overlap
(
docs_by_task_set
,
ngrams_path
,
ngrams_n_size
,
limit
):
def
get_train_overlap
(
docs_by_task_set
,
ngrams_path
,
limit
):
# return get_train_overlap_stub(docs, ngrams_path, ngrams_n_size)
# TODO: infer ngrams_n_size from ngrams_path
janitor
=
Janitor
()
# Build lookup for each dataset first in case we use different task combinations later
...
...
lm_eval/evaluator.py
View file @
0f283a9c
...
...
@@ -13,8 +13,7 @@ from lm_eval.utils import positional_deprecated
def
simple_evaluate
(
model
,
model_args
=
None
,
tasks
=
[],
num_fewshot
=
0
,
batch_size
=
None
,
device
=
None
,
no_cache
=
False
,
limit
=
None
,
bootstrap_iters
=
100000
,
description_dict
=
None
,
decontaminate
=
False
,
decontaminate_ngrams_path
=
None
,
decontaminate_ngrams_n_size
=
None
):
description_dict
=
None
,
decontamination_ngrams_path
=
None
):
"""Instantiate and evaluate a model on a list of tasks.
:param model: Union[str, LM]
...
...
@@ -68,9 +67,7 @@ def simple_evaluate(model, model_args=None, tasks=[],
num_fewshot
=
num_fewshot
,
limit
=
limit
,
description_dict
=
description_dict
,
decontaminate
=
decontaminate
,
decontaminate_ngrams_path
=
decontaminate_ngrams_path
,
decontaminate_ngrams_n_size
=
decontaminate_ngrams_n_size
decontamination_ngrams_path
=
decontamination_ngrams_path
,
)
# add info about the model and few shot config
...
...
@@ -92,7 +89,7 @@ decontaminate_suffix = "_decontaminate"
@
positional_deprecated
def
evaluate
(
lm
,
task_dict
,
provide_description
=
None
,
num_fewshot
=
0
,
limit
=
None
,
bootstrap_iters
=
100000
,
description_dict
=
None
,
decontaminat
e
=
False
,
decontaminate_ngrams_path
=
None
,
decontaminate_ngrams_n_size
=
None
):
decontaminat
ion_ngrams_path
=
None
):
"""Instantiate and evaluate a model on a list of tasks.
:param lm: obj
...
...
@@ -120,8 +117,7 @@ def evaluate(lm, task_dict, provide_description=None, num_fewshot=0, limit=None,
# nudge people to not specify it at all
print
(
"WARNING: provide_description is deprecated and will be removed in a future version in favor of description_dict"
)
if
decontaminate
:
assert
decontaminate_ngrams_path
and
decontaminate_ngrams_n_size
decontaminate
=
decontamination_ngrams_path
is
not
None
task_dict_items
=
[
(
name
,
task
)
...
...
@@ -193,7 +189,7 @@ def evaluate(lm, task_dict, provide_description=None, num_fewshot=0, limit=None,
# Compare all tasks/sets at once to ensure a single training set scan
if
decontaminate
:
print
(
"Finding train/test overlap, please wait..."
)
overlaps
=
lm_eval
.
decontamination
.
get_train_overlap
(
docs_for_decontamination
,
decontaminat
e
_ngrams_path
,
decontaminate_ngrams_n_size
,
limit
)
overlaps
=
lm_eval
.
decontamination
.
get_train_overlap
(
docs_for_decontamination
,
decontaminat
ion
_ngrams_path
,
limit
)
# all responses for each (task, doc)
process_res_queue
=
collections
.
defaultdict
(
list
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment