Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
04e72904
"tools/checkpoint/loader_legacy.py" did not exist on "3c92fa93b54befffb1f908ba7cd745a1527468a4"
Commit
04e72904
authored
Jan 02, 2021
by
Leo Gao
Browse files
Fix up bugs
parent
fb176b57
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
11 deletions
+13
-11
main.py
main.py
+13
-11
No files found.
main.py
View file @
04e72904
...
...
@@ -17,7 +17,7 @@ def parse_args():
parser
.
add_argument
(
'--num_fewshot'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
1234
)
parser
.
add_argument
(
'--output_path'
,
default
=
None
)
parser
.
add_argument
(
'--limit'
,
default
=
None
)
parser
.
add_argument
(
'--limit'
,
type
=
int
,
default
=
None
)
return
parser
.
parse_args
()
def
main
():
...
...
@@ -35,7 +35,7 @@ def main():
# TODO: fall back to test docs
task_dict_items
=
[(
name
,
task
)
for
name
,
task
in
task_dict
.
items
()
if
task
.
has_validation_docs
()]
results
=
{}
results
=
collections
.
defaultdict
(
dict
)
requests
=
collections
.
defaultdict
(
list
)
requests_origin
=
collections
.
defaultdict
(
list
)
...
...
@@ -74,27 +74,29 @@ def main():
vals
=
collections
.
defaultdict
(
list
)
for
(
task_name
,
doc_id
),
arg
s
in
process_res_queue
.
items
():
arg
s
.
sort
(
lambda
x
:
x
[
0
])
arg
s
=
[
x
[
1
]
for
x
in
arg
s
]
for
(
task_name
,
doc_id
),
request
s
in
process_res_queue
.
items
():
request
s
.
sort
(
key
=
lambda
x
:
x
[
0
])
request
s
=
[
x
[
1
]
for
x
in
request
s
]
task
=
task_dict
[
task_name
]
doc
=
docs
[(
task_name
,
doc_id
)]
metrics
=
task
.
process_results
(
doc
,
arg
s
)
metrics
=
task
.
process_results
(
doc
,
request
s
)
for
metric
in
metrics
:
results
[
(
task_name
,
metric
[
'submetric'
]
)
]
=
{
results
[
task_name
][
metric
[
'submetric'
]]
=
{
"higher_is_better"
:
metric
[
"higher_is_better"
],
"aggregation"
:
metric
[
"aggregation"
]
}
vals
[(
task_name
,
metric
[
'submetric'
])].
append
(
metric
[
'value'
])
for
k
in
results
.
keys
():
results
[
k
][
'value'
]
=
results
[
k
][
'aggregation'
](
vals
[
k
])
for
task_name
,
submetrics
in
results
.
items
():
for
k
in
submetrics
.
keys
():
submetrics
[
k
][
'value'
]
=
submetrics
[
k
][
'aggregation'
](
vals
[(
task_name
,
k
)])
# can't serialize a function
del
result
s
[
k
][
'aggregation'
]
# can't serialize a function
del
submetric
s
[
k
][
'aggregation'
]
print
(
results
)
dumped
=
json
.
dumps
(
results
,
indent
=
2
)
print
(
dumped
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment