Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d27c0c08
Unverified
Commit
d27c0c08
authored
Feb 26, 2024
by
LSinev
Committed by
GitHub
Feb 26, 2024
Browse files
Apply code autoformatting with Ruff to tasks/*.py an *__init__.py (#1469)
parent
f78e2da4
Changes
48
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
28 additions
and
29 deletions
+28
-29
lm_eval/tasks/scrolls/task.py
lm_eval/tasks/scrolls/task.py
+8
-7
lm_eval/tasks/squadv2/task.py
lm_eval/tasks/squadv2/task.py
+6
-6
lm_eval/tasks/super_glue/cb/aggregate.py
lm_eval/tasks/super_glue/cb/aggregate.py
+1
-1
lm_eval/tasks/super_glue/record/t5_utils.py
lm_eval/tasks/super_glue/record/t5_utils.py
+2
-2
lm_eval/tasks/super_glue/wsc/t5_utils.py
lm_eval/tasks/super_glue/wsc/t5_utils.py
+6
-5
lm_eval/tasks/truthfulqa/utils.py
lm_eval/tasks/truthfulqa/utils.py
+1
-2
lm_eval/tasks/xwinograd/utils.py
lm_eval/tasks/xwinograd/utils.py
+3
-1
pyproject.toml
pyproject.toml
+1
-5
No files found.
lm_eval/tasks/scrolls/task.py
View file @
d27c0c08
import
re
import
re
from
abc
import
abstractmethod
from
functools
import
reduce
import
numpy
as
np
import
numpy
as
np
import
transformers.data.metrics.squad_metrics
as
squad_metrics
import
transformers.data.metrics.squad_metrics
as
squad_metrics
from
abc
import
abstractmethod
from
datasets
import
load_metric
from
datasets
import
load_metric
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
from
functools
import
reduce
from
lm_eval.api.task
import
Task
from
lm_eval.api.metrics
import
mean
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.registry
import
register_task
from
lm_eval.api.metrics
import
mean
from
lm_eval.api.task
import
Task
_CITATION
=
"""
_CITATION
=
"""
@inproceedings{shaham-etal-2022-scrolls,
@inproceedings{shaham-etal-2022-scrolls,
...
@@ -44,6 +44,7 @@ _CITATION = """
...
@@ -44,6 +44,7 @@ _CITATION = """
def
_download_metric
():
def
_download_metric
():
import
os
import
os
import
shutil
import
shutil
from
huggingface_hub
import
hf_hub_download
from
huggingface_hub
import
hf_hub_download
scrolls_metric_path
=
hf_hub_download
(
scrolls_metric_path
=
hf_hub_download
(
...
@@ -148,7 +149,7 @@ class _SCROLLSTask(Task):
...
@@ -148,7 +149,7 @@ class _SCROLLSTask(Task):
del
self
.
dataset
[
"test"
]
del
self
.
dataset
[
"test"
]
for
split
in
self
.
dataset
:
for
split
in
self
.
dataset
:
self
.
dataset
[
split
]
=
_drop_duplicates_in_input
(
self
.
dataset
[
split
])
self
.
dataset
[
split
]
=
_drop_duplicates_in_input
(
self
.
dataset
[
split
])
if
self
.
PRUNE_TOKENIZERS
is
not
None
and
self
.
PRUNE_TOKENIZERS
is
not
None
:
if
self
.
PRUNE_TOKENIZERS
is
not
None
:
self
.
prune
()
self
.
prune
()
def
_get_prune_text
(
self
,
sample
):
def
_get_prune_text
(
self
,
sample
):
...
...
lm_eval/tasks/squadv2/task.py
View file @
d27c0c08
...
@@ -13,14 +13,15 @@ also determine when no answer is supported by the paragraph and abstain from ans
...
@@ -13,14 +13,15 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/
Homepage: https://rajpurkar.github.io/SQuAD-explorer/
"""
"""
import
datasets
from
math
import
exp
from
functools
import
partial
from
functools
import
partial
from
math
import
exp
import
datasets
from
packaging
import
version
from
packaging
import
version
from
lm_eval.api.task
import
ConfigurableTask
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.task
import
ConfigurableTask
_CITATION
=
"""
_CITATION
=
"""
@misc{rajpurkar2018know,
@misc{rajpurkar2018know,
...
@@ -35,7 +36,6 @@ _CITATION = """
...
@@ -35,7 +36,6 @@ _CITATION = """
def
_squad_metric
(
predictions
,
references
):
def
_squad_metric
(
predictions
,
references
):
# squad_metric = load("squad_v2")
squad_metric
=
datasets
.
load_metric
(
"squad_v2"
)
squad_metric
=
datasets
.
load_metric
(
"squad_v2"
)
return
squad_metric
.
compute
(
predictions
=
predictions
,
references
=
references
)
return
squad_metric
.
compute
(
predictions
=
predictions
,
references
=
references
)
...
@@ -52,7 +52,7 @@ class SQuAD2(ConfigurableTask):
...
@@ -52,7 +52,7 @@ class SQuAD2(ConfigurableTask):
DATASET_NAME
=
None
DATASET_NAME
=
None
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
(
config
=
{
'
metadata
'
:
{
'
version
'
:
self
.
VERSION
}})
super
().
__init__
(
config
=
{
"
metadata
"
:
{
"
version
"
:
self
.
VERSION
}})
# HF changed squad on us so we have to make sure we aren't running the old one
# HF changed squad on us so we have to make sure we aren't running the old one
assert
version
.
parse
(
datasets
.
__version__
)
>=
version
.
parse
(
assert
version
.
parse
(
datasets
.
__version__
)
>=
version
.
parse
(
...
...
lm_eval/tasks/super_glue/cb/aggregate.py
View file @
d27c0c08
import
sklearn
import
numpy
as
np
import
numpy
as
np
import
sklearn
def
cb_multi_fi
(
items
):
def
cb_multi_fi
(
items
):
...
...
lm_eval/tasks/super_glue/record/t5_utils.py
View file @
d27c0c08
import
collections
import
re
import
re
import
string
import
string
import
collections
import
numpy
as
np
import
numpy
as
np
from
datasets
import
Dataset
from
datasets
import
Dataset
from
lm_eval.api.metrics
import
metric_max_over_ground_truths
from
lm_eval.api.metrics
import
metric_max_over_ground_truths
...
...
lm_eval/tasks/super_glue/wsc/t5_utils.py
View file @
d27c0c08
import
re
import
re
from
typing
import
List
from
typing
import
List
def
doc_to_text
(
x
):
def
doc_to_text
(
x
):
text
=
re
.
sub
(
r
" X "
,
" *"
+
x
[
"span2_text"
]
+
"* "
,
_wsc_inputs
(
x
))
text
=
re
.
sub
(
r
" X "
,
" *"
+
x
[
"span2_text"
]
+
"* "
,
_wsc_inputs
(
x
))
return
"wsc: "
+
text
return
"wsc: "
+
text
...
@@ -23,7 +24,7 @@ def _wsc_inputs(x):
...
@@ -23,7 +24,7 @@ def _wsc_inputs(x):
[
[
" "
.
join
(
words
[:
pronoun_index
]),
" "
.
join
(
words
[:
pronoun_index
]),
"X"
,
"X"
,
" "
.
join
(
words
[
pronoun_index
+
1
:]),
" "
.
join
(
words
[
pronoun_index
+
1
:]),
]
]
)
)
...
...
lm_eval/tasks/truthfulqa/utils.py
View file @
d27c0c08
import
datasets
import
datasets
import
sacrebleu
import
numpy
as
np
import
numpy
as
np
import
sacrebleu
from
rouge_score
import
rouge_scorer
,
scoring
from
rouge_score
import
rouge_scorer
,
scoring
...
...
lm_eval/tasks/xwinograd/utils.py
View file @
d27c0c08
...
@@ -51,7 +51,9 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
...
@@ -51,7 +51,9 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
for
lang
in
LANGUAGES
:
for
lang
in
LANGUAGES
:
file_name
=
f
"xwinograd_
{
lang
}
.yaml"
file_name
=
f
"xwinograd_
{
lang
}
.yaml"
try
:
try
:
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
yaml
.
dump
(
{
{
...
...
pyproject.toml
View file @
d27c0c08
...
@@ -90,9 +90,6 @@ all = [
...
@@ -90,9 +90,6 @@ all = [
"lm_eval[wandb]"
,
"lm_eval[wandb]"
,
]
]
[tool.ruff]
extend-exclude
=
["lm_eval/tasks/*.py"]
[tool.ruff.lint]
[tool.ruff.lint]
extend-select
=
["I"]
extend-select
=
["I"]
...
@@ -101,5 +98,4 @@ lines-after-imports = 2
...
@@ -101,5 +98,4 @@ lines-after-imports = 2
known-first-party
=
["lm_eval"]
known-first-party
=
["lm_eval"]
[tool.ruff.extend-per-file-ignores]
[tool.ruff.extend-per-file-ignores]
"__init__.py"
=
["F401","F402","F403","I"]
"__init__.py"
=
["F401","F402","F403"]
"lm_eval/tasks/*"
=
["E721"]
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment