Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d27c0c08
Unverified
Commit
d27c0c08
authored
Feb 26, 2024
by
LSinev
Committed by
GitHub
Feb 26, 2024
Browse files
Apply code autoformatting with Ruff to tasks/*.py an *__init__.py (#1469)
parent
f78e2da4
Changes
48
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
28 additions
and
29 deletions
+28
-29
lm_eval/tasks/scrolls/task.py
lm_eval/tasks/scrolls/task.py
+8
-7
lm_eval/tasks/squadv2/task.py
lm_eval/tasks/squadv2/task.py
+6
-6
lm_eval/tasks/super_glue/cb/aggregate.py
lm_eval/tasks/super_glue/cb/aggregate.py
+1
-1
lm_eval/tasks/super_glue/record/t5_utils.py
lm_eval/tasks/super_glue/record/t5_utils.py
+2
-2
lm_eval/tasks/super_glue/wsc/t5_utils.py
lm_eval/tasks/super_glue/wsc/t5_utils.py
+6
-5
lm_eval/tasks/truthfulqa/utils.py
lm_eval/tasks/truthfulqa/utils.py
+1
-2
lm_eval/tasks/xwinograd/utils.py
lm_eval/tasks/xwinograd/utils.py
+3
-1
pyproject.toml
pyproject.toml
+1
-5
No files found.
lm_eval/tasks/scrolls/task.py
View file @
d27c0c08
import
re
from
abc
import
abstractmethod
from
functools
import
reduce
import
numpy
as
np
import
transformers.data.metrics.squad_metrics
as
squad_metrics
from
abc
import
abstractmethod
from
datasets
import
load_metric
from
transformers
import
AutoTokenizer
from
functools
import
reduce
from
lm_eval.api.task
import
Task
from
lm_eval.api.metrics
import
mean
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.registry
import
register_task
from
lm_eval.api.metrics
import
mean
from
lm_eval.api.task
import
Task
_CITATION
=
"""
@inproceedings{shaham-etal-2022-scrolls,
...
...
@@ -44,6 +44,7 @@ _CITATION = """
def
_download_metric
():
import
os
import
shutil
from
huggingface_hub
import
hf_hub_download
scrolls_metric_path
=
hf_hub_download
(
...
...
@@ -148,7 +149,7 @@ class _SCROLLSTask(Task):
del
self
.
dataset
[
"test"
]
for
split
in
self
.
dataset
:
self
.
dataset
[
split
]
=
_drop_duplicates_in_input
(
self
.
dataset
[
split
])
if
self
.
PRUNE_TOKENIZERS
is
not
None
and
self
.
PRUNE_TOKENIZERS
is
not
None
:
if
self
.
PRUNE_TOKENIZERS
is
not
None
:
self
.
prune
()
def
_get_prune_text
(
self
,
sample
):
...
...
lm_eval/tasks/squadv2/task.py
View file @
d27c0c08
...
...
@@ -13,14 +13,15 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/
"""
import
datasets
from
math
import
exp
from
functools
import
partial
from
math
import
exp
import
datasets
from
packaging
import
version
from
lm_eval.api.task
import
ConfigurableTask
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.task
import
ConfigurableTask
_CITATION
=
"""
@misc{rajpurkar2018know,
...
...
@@ -35,7 +36,6 @@ _CITATION = """
def
_squad_metric
(
predictions
,
references
):
# squad_metric = load("squad_v2")
squad_metric
=
datasets
.
load_metric
(
"squad_v2"
)
return
squad_metric
.
compute
(
predictions
=
predictions
,
references
=
references
)
...
...
@@ -52,7 +52,7 @@ class SQuAD2(ConfigurableTask):
DATASET_NAME
=
None
def
__init__
(
self
):
super
().
__init__
(
config
=
{
'
metadata
'
:
{
'
version
'
:
self
.
VERSION
}})
super
().
__init__
(
config
=
{
"
metadata
"
:
{
"
version
"
:
self
.
VERSION
}})
# HF changed squad on us so we have to make sure we aren't running the old one
assert
version
.
parse
(
datasets
.
__version__
)
>=
version
.
parse
(
...
...
lm_eval/tasks/super_glue/cb/aggregate.py
View file @
d27c0c08
import
sklearn
import
numpy
as
np
import
sklearn
def
cb_multi_fi
(
items
):
...
...
lm_eval/tasks/super_glue/record/t5_utils.py
View file @
d27c0c08
import
collections
import
re
import
string
import
collections
import
numpy
as
np
import
numpy
as
np
from
datasets
import
Dataset
from
lm_eval.api.metrics
import
metric_max_over_ground_truths
...
...
lm_eval/tasks/super_glue/wsc/t5_utils.py
View file @
d27c0c08
import
re
from
typing
import
List
def
doc_to_text
(
x
):
text
=
re
.
sub
(
r
" X "
,
" *"
+
x
[
"span2_text"
]
+
"* "
,
_wsc_inputs
(
x
))
return
"wsc: "
+
text
...
...
@@ -23,7 +24,7 @@ def _wsc_inputs(x):
[
" "
.
join
(
words
[:
pronoun_index
]),
"X"
,
" "
.
join
(
words
[
pronoun_index
+
1
:]),
" "
.
join
(
words
[
pronoun_index
+
1
:]),
]
)
...
...
lm_eval/tasks/truthfulqa/utils.py
View file @
d27c0c08
import
datasets
import
sacrebleu
import
numpy
as
np
import
sacrebleu
from
rouge_score
import
rouge_scorer
,
scoring
...
...
lm_eval/tasks/xwinograd/utils.py
View file @
d27c0c08
...
...
@@ -51,7 +51,9 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
for
lang
in
LANGUAGES
:
file_name
=
f
"xwinograd_
{
lang
}
.yaml"
try
:
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
"# Generated by utils.py
\n
"
)
yaml
.
dump
(
{
...
...
pyproject.toml
View file @
d27c0c08
...
...
@@ -90,9 +90,6 @@ all = [
"lm_eval[wandb]"
,
]
[tool.ruff]
extend-exclude
=
["lm_eval/tasks/*.py"]
[tool.ruff.lint]
extend-select
=
["I"]
...
...
@@ -101,5 +98,4 @@ lines-after-imports = 2
known-first-party
=
["lm_eval"]
[tool.ruff.extend-per-file-ignores]
"__init__.py"
=
["F401","F402","F403","I"]
"lm_eval/tasks/*"
=
["E721"]
"__init__.py"
=
["F401","F402","F403"]
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment