Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d9b547b7
Commit
d9b547b7
authored
Jul 13, 2023
by
baberabb
Browse files
fix test_evaluator.py
parent
7d4e92fa
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
17 deletions
+15
-17
lm_eval/models/dummy.py
lm_eval/models/dummy.py
+1
-1
tests/test_evaluator.py
tests/test_evaluator.py
+14
-16
No files found.
lm_eval/models/dummy.py
View file @
d9b547b7
...
@@ -6,7 +6,7 @@ from lm_eval.api.registry import register_model
...
@@ -6,7 +6,7 @@ from lm_eval.api.registry import register_model
@
register_model
(
"dummy"
)
@
register_model
(
"dummy"
)
class
DummyLM
(
LM
):
class
DummyLM
(
LM
):
def
__init__
(
self
):
def
__init__
(
self
):
pass
super
().
__init__
()
@
classmethod
@
classmethod
def
create_from_arg_string
(
cls
,
arg_string
,
additional_config
=
None
):
def
create_from_arg_string
(
cls
,
arg_string
,
additional_config
=
None
):
...
...
tests/test_evaluator.py
View file @
d9b547b7
...
@@ -14,10 +14,11 @@ import pytest
...
@@ -14,10 +14,11 @@ import pytest
# TODO: more fine grained unit tests rather than this big honking integration
# TODO: more fine grained unit tests rather than this big honking integration
# test once we break evaluator into smaller, more manageable pieces
# test once we break evaluator into smaller, more manageable pieces
# @pytest.mark.parametrize("taskname,task_class", tasks.TASK_REGISTRY.items())
@
pytest
.
mark
.
parametrize
(
"taskname,task_class"
,
tasks
.
TASK_REGISTRY
.
items
())
def
test_evaluator
():
def
test_evaluator
(
taskname
,
task_class
):
TASK
=
[
"arc_easy"
]
task_dict
=
tasks
.
get_task_dict
([
taskname
])
LIMIT
=
10
# task_dict = tasks.get_task_dict(task)
# TODO: re-add cachingLM
# TODO: re-add cachingLM
# os.system("rm test_cache.db")
# os.system("rm test_cache.db")
...
@@ -25,7 +26,7 @@ def test_evaluator(taskname, task_class):
...
@@ -25,7 +26,7 @@ def test_evaluator(taskname, task_class):
lm
=
registry
.
get_model
(
"dummy"
)()
lm
=
registry
.
get_model
(
"dummy"
)()
def
ll_fn
(
reqs
):
def
ll_fn
(
reqs
):
for
ctx
,
cont
in
reqs
:
for
ctx
,
cont
in
[
req
.
args
for
req
in
reqs
]
:
if
len
(
ctx
)
==
0
:
if
len
(
ctx
)
==
0
:
continue
continue
# space convention
# space convention
...
@@ -54,19 +55,16 @@ def test_evaluator(taskname, task_class):
...
@@ -54,19 +55,16 @@ def test_evaluator(taskname, task_class):
lm
.
loglikelihood
=
ll_fn
lm
.
loglikelihood
=
ll_fn
lm
.
loglikelihood_rolling
=
ll_perp_fn
lm
.
loglikelihood_rolling
=
ll_perp_fn
limit
=
10
e1
=
evaluator
.
simple_evaluate
(
e1
=
evaluator
.
evaluate
(
model
=
"dummy"
,
lm
=
lm
,
tasks
=
TASK
,
task_dict
=
task_dict
,
limit
=
LIMIT
,
num_fewshot
=
0
,
limit
=
limit
,
bootstrap_iters
=
10
,
bootstrap_iters
=
10
,
)
)
e2
=
evaluator
.
evaluate
(
e2
=
evaluator
.
simple_evaluate
(
lm
=
lm
,
model
=
"dummy"
,
task_dict
=
task_dict
,
tasks
=
TASK
,
num_fewshot
=
0
,
limit
=
LIMIT
,
limit
=
limit
,
bootstrap_iters
=
10
,
bootstrap_iters
=
10
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment