Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9d8e0532
Commit
9d8e0532
authored
May 01, 2023
by
haileyschoelkopf
Browse files
change id_ to idx in instance
parent
2a9da9fb
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
14 additions
and
12 deletions
+14
-12
lm_eval/api/instance.py
lm_eval/api/instance.py
+3
-3
lm_eval/api/samplers.py
lm_eval/api/samplers.py
+6
-4
lm_eval/api/task.py
lm_eval/api/task.py
+3
-3
lm_eval/evaluator.py
lm_eval/evaluator.py
+1
-1
lm_eval/tasks/gsm8k.py
lm_eval/tasks/gsm8k.py
+1
-1
No files found.
lm_eval/api/instance.py
View file @
9d8e0532
from
dataclasses
import
dataclass
,
field
from
dataclasses
import
dataclass
,
field
from
typing
import
Literal
from
typing
import
Literal
,
Tuple
@
dataclass
@
dataclass
class
Instance
:
class
Instance
:
request_type
:
str
=
Literal
[
"loglikelihood"
,
"loglikelihood_rolling"
,
"greedy_until"
]
request_type
:
str
=
Literal
[
"loglikelihood"
,
"loglikelihood_rolling"
,
"greedy_until"
]
doc
:
dict
=
None
doc
:
dict
=
None
arguments
:
tuple
=
None
arguments
:
tuple
=
None
id
_
:
int
=
None
id
x
:
int
=
None
metadata
:
tuple
=
None
# TODO: better typehints here
metadata
:
tuple
=
Tuple
[
str
,
int
,
int
]
# TODO: better typehints here
resps
:
list
=
field
(
default_factory
=
list
)
resps
:
list
=
field
(
default_factory
=
list
)
filtered_resps
:
dict
=
field
(
default_factory
=
dict
)
filtered_resps
:
dict
=
field
(
default_factory
=
dict
)
...
...
lm_eval/api/samplers.py
View file @
9d8e0532
class
Sampler
:
class
Sampler
:
# TODO: make this abstract class?
def
__init__
(
self
,
docs
,
task
,
fewshot_indices
=
None
,
rnd
=
None
):
def
__init__
(
self
,
docs
,
task
,
fewshot_indices
=
None
,
rnd
=
None
):
...
@@ -17,14 +16,17 @@ class Sampler: # TODO: make this abstract class?
...
@@ -17,14 +16,17 @@ class Sampler: # TODO: make this abstract class?
if
fewshot_indices
:
# subset few-shot docs from
if
fewshot_indices
:
# subset few-shot docs from
self
.
docs
=
self
.
docs
.
select
(
fewshot_indices
)
self
.
docs
=
self
.
docs
.
select
(
fewshot_indices
)
def
get_context
(
self
,
doc
,
num_fewshot
):
def
get_context
(
self
,
doc
,
num_fewshot
):
# draw an extra fewshot sample if
# draw an extra fewshot sample if
using same split as evaluting on
n_samples
=
num_fewshot
+
1
if
self
.
config
.
fewshot_split
==
self
.
config
.
test_split
else
num_fewshot
n_samples
=
num_fewshot
+
1
if
self
.
config
.
fewshot_split
==
self
.
config
.
test_split
else
num_fewshot
# draw `n_samples` docs from fewshot_docs
fewshotex
=
self
.
sample
(
n_samples
)
fewshotex
=
self
.
sample
(
n_samples
)
# get rid of the doc that's the one we're evaluating, if it's in the fewshot
# get rid of the doc that's the one we're evaluating, if it's in the fewshot
# TODO: should we just stop people from using fewshot from same split as evaluating?
selected_docs
=
[
x
for
x
in
fewshotex
if
x
!=
doc
][:
num_fewshot
]
selected_docs
=
[
x
for
x
in
fewshotex
if
x
!=
doc
][:
num_fewshot
]
labeled_examples
=
(
labeled_examples
=
(
...
@@ -53,7 +55,7 @@ class BalancedSampler(Sampler):
...
@@ -53,7 +55,7 @@ class BalancedSampler(Sampler):
def
sample
(
self
,
n
):
def
sample
(
self
,
n
):
"""
"""
TODO: this should return approximately class-balanced samples from our fewshot examples.
TODO: this should return approximately class-balanced samples from our fewshot examples.
TODO: what order should they be in?
TODO: what order should they be in?
maybe random?
"""
"""
pass
pass
...
...
lm_eval/api/task.py
View file @
9d8e0532
...
@@ -469,7 +469,7 @@ class ConfigurableTask(Task):
...
@@ -469,7 +469,7 @@ class ConfigurableTask(Task):
def
construct_requests
(
self
,
doc
,
ctx
,
**
kwargs
):
def
construct_requests
(
self
,
doc
,
ctx
,
**
kwargs
):
if
self
.
OUTPUT_TYPE
==
"greedy_until"
:
if
self
.
OUTPUT_TYPE
==
"greedy_until"
:
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
ctx
,
"
\n\n
"
),
id
_
=
0
,
**
kwargs
)
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
ctx
,
"
\n\n
"
),
id
x
=
0
,
**
kwargs
)
def
process_results
(
self
,
doc
,
results
):
def
process_results
(
self
,
doc
,
results
):
...
@@ -511,7 +511,7 @@ class MultipleChoiceTask(Task):
...
@@ -511,7 +511,7 @@ class MultipleChoiceTask(Task):
request_type
=
"loglikelihood"
,
request_type
=
"loglikelihood"
,
doc
=
doc
,
doc
=
doc
,
arguments
=
(
ctx
,
" {}"
.
format
(
choice
)),
arguments
=
(
ctx
,
" {}"
.
format
(
choice
)),
id
_
=
i
,
id
x
=
i
,
**
kwargs
,
**
kwargs
,
)
)
for
i
,
choice
in
enumerate
(
doc
[
"choices"
])]
for
i
,
choice
in
enumerate
(
doc
[
"choices"
])]
...
@@ -589,7 +589,7 @@ class PerplexityTask(Task, abc.ABC):
...
@@ -589,7 +589,7 @@ class PerplexityTask(Task, abc.ABC):
def
construct_requests
(
self
,
doc
,
ctx
,
**
kwargs
):
def
construct_requests
(
self
,
doc
,
ctx
,
**
kwargs
):
assert
not
ctx
assert
not
ctx
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
self
.
doc_to_target
(
doc
),),
id
_
=
0
,
**
kwargs
)
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
self
.
doc_to_target
(
doc
),),
id
x
=
0
,
**
kwargs
)
# req = rf.loglikelihood_rolling(self.doc_to_target(doc))
# req = rf.loglikelihood_rolling(self.doc_to_target(doc))
# return req
# return req
...
...
lm_eval/evaluator.py
View file @
9d8e0532
...
@@ -181,7 +181,7 @@ def evaluate(
...
@@ -181,7 +181,7 @@ def evaluate(
for
doc_id
,
doc
in
enumerate
(
itertools
.
islice
(
task
.
test_docs
(),
0
,
limit
)
if
task
.
has_test_docs
()
else
task
.
validation_docs
()):
for
doc_id
,
doc
in
enumerate
(
itertools
.
islice
(
task
.
test_docs
(),
0
,
limit
)
if
task
.
has_test_docs
()
else
task
.
validation_docs
()):
# subset instances to only this document id ; sort by idx
# subset instances to only this document id ; sort by idx
requests
=
list
(
filter
(
lambda
x
:
x
.
doc_id
==
doc_id
,
task
.
instances
))
requests
=
list
(
filter
(
lambda
x
:
x
.
doc_id
==
doc_id
,
task
.
instances
))
requests
.
sort
(
key
=
lambda
x
:
x
.
id
_
)
requests
.
sort
(
key
=
lambda
x
:
x
.
id
x
)
metrics
=
task
.
process_results
(
doc
,
[
req
.
filtered_resps
[
key
]
for
req
in
requests
])
metrics
=
task
.
process_results
(
doc
,
[
req
.
filtered_resps
[
key
]
for
req
in
requests
])
for
metric
,
value
in
metrics
.
items
():
for
metric
,
value
in
metrics
.
items
():
vals
[(
task_name
,
key
,
metric
)].
append
(
value
)
vals
[(
task_name
,
key
,
metric
)].
append
(
value
)
...
...
lm_eval/tasks/gsm8k.py
View file @
9d8e0532
...
@@ -88,7 +88,7 @@ class GradeSchoolMath8K(Task):
...
@@ -88,7 +88,7 @@ class GradeSchoolMath8K(Task):
"""
"""
# NOTE: The paper implements "verifiers" that assign a score to multiple
# NOTE: The paper implements "verifiers" that assign a score to multiple
# solutions and output the highest ranked solution.
# solutions and output the highest ranked solution.
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
ctx
,
[
"
\n
"
]),
id
_
=
0
,
**
kwargs
)
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
ctx
,
[
"
\n
"
]),
id
x
=
0
,
**
kwargs
)
# completion = rf.greedy_until(ctx, ["\n"])
# completion = rf.greedy_until(ctx, ["\n"])
# return completion
# return completion
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment