Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c4c20ff5
Commit
c4c20ff5
authored
May 19, 2023
by
lintangsutawika
Browse files
pre-commit stuff
parent
e56b950a
Changes
32
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
56 additions
and
39 deletions
+56
-39
lm_eval/tasks/arc/README.md
lm_eval/tasks/arc/README.md
+1
-1
lm_eval/tasks/arc/arc_challenge.yaml
lm_eval/tasks/arc/arc_challenge.yaml
+2
-2
lm_eval/tasks/arc/arc_easy.yaml
lm_eval/tasks/arc/arc_easy.yaml
+2
-2
lm_eval/tasks/gsm8k.py
lm_eval/tasks/gsm8k.py
+9
-2
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+9
-2
lm_eval/tasks/pile.py
lm_eval/tasks/pile.py
+2
-2
lm_eval/tasks/pile_enron.yaml
lm_eval/tasks/pile_enron.yaml
+1
-1
lm_eval/tasks/super_glue/wsc/preprocess_wsc.py
lm_eval/tasks/super_glue/wsc/preprocess_wsc.py
+10
-10
lm_eval/tasks/wikitext.py
lm_eval/tasks/wikitext.py
+1
-0
lm_eval/utils.py
lm_eval/utils.py
+13
-13
main.py
main.py
+5
-3
setup.py
setup.py
+1
-1
No files found.
lm_eval/tasks/arc/README.md
View file @
c4c20ff5
lm_eval/tasks/arc/arc_challenge.yaml
View file @
c4c20ff5
lm_eval/tasks/arc/arc_easy.yaml
View file @
c4c20ff5
lm_eval/tasks/gsm8k.py
View file @
c4c20ff5
...
@@ -17,9 +17,10 @@ model's sample/generation function.
...
@@ -17,9 +17,10 @@ model's sample/generation function.
Homepage: https://github.com/openai/grade-school-math
Homepage: https://github.com/openai/grade-school-math
"""
"""
import
re
import
re
from
lm_eval
import
utils
from
lm_eval.api.task
import
Task
from
lm_eval.api.metrics
import
mean
from
lm_eval.api.metrics
import
mean
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.task
import
Task
from
lm_eval.prompts
import
get_prompt
from
lm_eval.prompts
import
get_prompt
...
@@ -88,7 +89,13 @@ class GradeSchoolMath8K(Task):
...
@@ -88,7 +89,13 @@ class GradeSchoolMath8K(Task):
"""
"""
# NOTE: The paper implements "verifiers" that assign a score to multiple
# NOTE: The paper implements "verifiers" that assign a score to multiple
# solutions and output the highest ranked solution.
# solutions and output the highest ranked solution.
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
ctx
,
[
"
\n
"
]),
idx
=
0
,
**
kwargs
)
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
ctx
,
[
"
\n
"
]),
idx
=
0
,
**
kwargs
)
# completion = rf.greedy_until(ctx, ["\n"])
# completion = rf.greedy_until(ctx, ["\n"])
# return completion
# return completion
...
...
lm_eval/tasks/lambada.py
View file @
c4c20ff5
...
@@ -60,11 +60,18 @@ class LambadaBase(Task):
...
@@ -60,11 +60,18 @@ class LambadaBase(Task):
return
" "
+
doc
[
"text"
].
rsplit
(
" "
,
1
)[
1
]
return
" "
+
doc
[
"text"
].
rsplit
(
" "
,
1
)[
1
]
def
construct_requests
(
self
,
doc
,
ctx
,
**
kwargs
):
def
construct_requests
(
self
,
doc
,
ctx
,
**
kwargs
):
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
ctx
,
self
.
doc_to_target
(
doc
)),
**
kwargs
)
return
Instance
(
request_type
=
self
.
OUTPUT_TYPE
,
doc
=
doc
,
arguments
=
(
ctx
,
self
.
doc_to_target
(
doc
)),
**
kwargs
)
def
process_results
(
self
,
doc
,
results
):
def
process_results
(
self
,
doc
,
results
):
# TODO: this ^ is a hack. filters should make it so that we only have one response per request that we score
# TODO: this ^ is a hack. filters should make it so that we only have one response per request that we score
results
=
results
[
0
]
# TODO: recheck this. currently a list of [(ll, is_greedy)] is passed in
results
=
results
[
0
]
# TODO: recheck this. currently a list of [(ll, is_greedy)] is passed in
ll
,
is_greedy
=
results
ll
,
is_greedy
=
results
return
{
"ppl"
:
ll
,
"acc"
:
int
(
is_greedy
)}
return
{
"ppl"
:
ll
,
"acc"
:
int
(
is_greedy
)}
...
...
lm_eval/tasks/pile.py
View file @
c4c20ff5
lm_eval/tasks/pile_enron.yaml
View file @
c4c20ff5
lm_eval/tasks/super_glue/wsc/preprocess_wsc.py
View file @
c4c20ff5
import
re
import
re
def
doc_to_text
(
x
):
def
doc_to_text
(
x
):
def
_mark_span
(
text
,
span_str
,
span_idx
,
mark
):
def
_mark_span
(
text
,
span_str
,
span_idx
,
mark
):
pattern_tmpl
=
r
'
^((?:\S+\s){N})(W)
'
pattern_tmpl
=
r
"
^((?:\S+\s){N})(W)
"
pattern
=
re
.
sub
(
'N'
,
str
(
span_idx
),
pattern_tmpl
)
pattern
=
re
.
sub
(
"N"
,
str
(
span_idx
),
pattern_tmpl
)
pattern
=
re
.
sub
(
'W'
,
span_str
,
pattern
)
pattern
=
re
.
sub
(
"W"
,
span_str
,
pattern
)
return
re
.
sub
(
pattern
,
r
'
\1{0} \2 {0}
'
.
format
(
mark
),
text
)
return
re
.
sub
(
pattern
,
r
"
\1{0} \2 {0}
"
.
format
(
mark
),
text
)
text
=
x
[
'
text
'
]
text
=
x
[
"
text
"
]
text
=
_mark_span
(
text
,
x
[
'
span1_text
'
],
x
[
'
span1_index
'
],
'*'
)
text
=
_mark_span
(
text
,
x
[
"
span1_text
"
],
x
[
"
span1_index
"
],
"*"
)
# Compensate for 2 added "words" added in previous step.
# Compensate for 2 added "words" added in previous step.
span2_index
=
x
[
'
span2_index
'
]
+
2
*
(
x
[
'
span1_index
'
]
<
x
[
'
span2_index
'
])
span2_index
=
x
[
"
span2_index
"
]
+
2
*
(
x
[
"
span1_index
"
]
<
x
[
"
span2_index
"
])
text
=
_mark_span
(
text
,
x
[
'
span2_text
'
],
span2_index
,
'#'
)
text
=
_mark_span
(
text
,
x
[
"
span2_text
"
],
span2_index
,
"#"
)
return
text
return
text
lm_eval/tasks/wikitext.py
View file @
c4c20ff5
...
@@ -60,6 +60,7 @@ def wikitext_detokenizer(string):
...
@@ -60,6 +60,7 @@ def wikitext_detokenizer(string):
return
string
return
string
@
register_task
(
"wikitext"
)
@
register_task
(
"wikitext"
)
class
WikiText
(
PerplexityTask
):
class
WikiText
(
PerplexityTask
):
VERSION
=
"2.0"
VERSION
=
"2.0"
...
...
lm_eval/utils.py
View file @
c4c20ff5
...
@@ -150,7 +150,6 @@ class Reorderer:
...
@@ -150,7 +150,6 @@ class Reorderer:
return
res
return
res
def
make_table
(
result_dict
):
def
make_table
(
result_dict
):
"""Generate table of results."""
"""Generate table of results."""
from
pytablewriter
import
MarkdownTableWriter
,
LatexTableWriter
from
pytablewriter
import
MarkdownTableWriter
,
LatexTableWriter
...
@@ -262,7 +261,7 @@ def import_function(loader, node):
...
@@ -262,7 +261,7 @@ def import_function(loader, node):
function_name
=
loader
.
construct_scalar
(
node
)
function_name
=
loader
.
construct_scalar
(
node
)
yaml_path
=
os
.
path
.
dirname
(
loader
.
name
)
yaml_path
=
os
.
path
.
dirname
(
loader
.
name
)
module_name
,
function_name
=
function_name
.
split
(
'.'
)
module_name
,
function_name
=
function_name
.
split
(
"."
)
module_path
=
os
.
path
.
join
(
yaml_path
,
"{}.py"
.
format
(
module_name
))
module_path
=
os
.
path
.
join
(
yaml_path
,
"{}.py"
.
format
(
module_name
))
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
module_path
)
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
module_path
)
...
@@ -272,18 +271,19 @@ def import_function(loader, node):
...
@@ -272,18 +271,19 @@ def import_function(loader, node):
function
=
getattr
(
module
,
function_name
)
function
=
getattr
(
module
,
function_name
)
return
function
return
function
# Add the import_function constructor to the YAML loader
# Add the import_function constructor to the YAML loader
yaml
.
add_constructor
(
'
!function
'
,
import_function
)
yaml
.
add_constructor
(
"
!function
"
,
import_function
)
def
load_yaml_config
(
yaml_path
):
def
load_yaml_config
(
yaml_path
):
with
open
(
yaml_path
,
'
rb
'
)
as
file
:
with
open
(
yaml_path
,
"
rb
"
)
as
file
:
yaml_config
=
yaml
.
full_load
(
file
)
yaml_config
=
yaml
.
full_load
(
file
)
yaml_dir
=
os
.
path
.
dirname
(
yaml_path
)
yaml_dir
=
os
.
path
.
dirname
(
yaml_path
)
if
'
include
'
in
yaml_config
:
if
"
include
"
in
yaml_config
:
include_path
=
yaml_config
[
'
include
'
]
include_path
=
yaml_config
[
"
include
"
]
del
yaml_config
[
'
include
'
]
del
yaml_config
[
"
include
"
]
if
type
(
include_path
)
==
str
:
if
type
(
include_path
)
==
str
:
include_path
=
[
include_path
]
include_path
=
[
include_path
]
...
@@ -302,9 +302,9 @@ def load_yaml_config(yaml_path):
...
@@ -302,9 +302,9 @@ def load_yaml_config(yaml_path):
try
:
try
:
included_yaml_config
=
load_yaml_config
(
path
)
included_yaml_config
=
load_yaml_config
(
path
)
final_yaml_config
.
update
(
included_yaml_config
)
final_yaml_config
.
update
(
included_yaml_config
)
except
:
except
Exception
as
ex
:
# If failed to load, ignore
# If failed to load, ignore
pass
raise
ex
final_yaml_config
.
update
(
yaml_config
)
final_yaml_config
.
update
(
yaml_config
)
return
final_yaml_config
return
final_yaml_config
...
@@ -313,7 +313,7 @@ def load_yaml_config(yaml_path):
...
@@ -313,7 +313,7 @@ def load_yaml_config(yaml_path):
env
=
Environment
(
loader
=
BaseLoader
,
undefined
=
StrictUndefined
)
env
=
Environment
(
loader
=
BaseLoader
,
undefined
=
StrictUndefined
)
def
apply_template
(
template
,
doc
):
def
apply_template
(
template
,
doc
):
rtemplate
=
env
.
from_string
(
template
)
rtemplate
=
env
.
from_string
(
template
)
return
rtemplate
.
render
(
**
doc
)
return
rtemplate
.
render
(
**
doc
)
main.py
View file @
c4c20ff5
...
@@ -7,7 +7,8 @@ from lm_eval import evaluator, utils
...
@@ -7,7 +7,8 @@ from lm_eval import evaluator, utils
from
lm_eval.tasks
import
ALL_TASKS
from
lm_eval.tasks
import
ALL_TASKS
from
lm_eval.logger
import
eval_logger
from
lm_eval.logger
import
eval_logger
os
.
environ
[
'TOKENIZERS_PARALLELISM'
]
=
'false'
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
class
MultiChoice
:
class
MultiChoice
:
def
__init__
(
self
,
choices
):
def
__init__
(
self
,
choices
):
...
@@ -65,9 +66,10 @@ def main():
...
@@ -65,9 +66,10 @@ def main():
"REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
"REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
)
)
if
args
.
tasks
!=
None
:
if
args
.
tasks
is
not
None
:
if
os
.
path
.
isdir
(
args
.
tasks
):
if
os
.
path
.
isdir
(
args
.
tasks
):
import
glob
import
glob
task_names
=
[]
task_names
=
[]
yaml_path
=
os
.
path
.
join
(
args
.
tasks
,
"*.yaml"
)
yaml_path
=
os
.
path
.
join
(
args
.
tasks
,
"*.yaml"
)
for
yaml_file
in
glob
.
glob
(
yaml_path
):
for
yaml_file
in
glob
.
glob
(
yaml_path
):
...
...
setup.py
View file @
c4c20ff5
...
@@ -42,6 +42,6 @@ setuptools.setup(
...
@@ -42,6 +42,6 @@ setuptools.setup(
extras_require
=
{
extras_require
=
{
"dev"
:
[
"black"
,
"flake8"
,
"pre-commit"
,
"pytest"
,
"pytest-cov"
],
"dev"
:
[
"black"
,
"flake8"
,
"pre-commit"
,
"pytest"
,
"pytest-cov"
],
"multilingual"
:
[
"nagisa>=0.2.7"
,
"jieba>=0.42.1"
],
"multilingual"
:
[
"nagisa>=0.2.7"
,
"jieba>=0.42.1"
],
"sentencepiece"
:
[
"sentencepiece>=0.1.98"
,
"protobuf>=4.22.1"
]
"sentencepiece"
:
[
"sentencepiece>=0.1.98"
,
"protobuf>=4.22.1"
]
,
},
},
)
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment