Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
1cd4ec01
Unverified
Commit
1cd4ec01
authored
Apr 27, 2022
by
Stella Biderman
Committed by
GitHub
Apr 27, 2022
Browse files
Merge pull request #8 from dirkgr/PytestUpdate
Pytest update
parents
33f5572a
38a240ce
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
14 additions
and
16 deletions
+14
-16
lm_eval/tasks/hendrycks_ethics.py
lm_eval/tasks/hendrycks_ethics.py
+4
-6
lm_eval/tasks/hendrycks_math.py
lm_eval/tasks/hendrycks_math.py
+3
-3
lm_eval/tasks/wikitext.py
lm_eval/tasks/wikitext.py
+4
-4
lm_eval/tasks/wsc273.py
lm_eval/tasks/wsc273.py
+2
-2
setup.py
setup.py
+1
-1
No files found.
lm_eval/tasks/hendrycks_ethics.py
View file @
1cd4ec01
...
...
@@ -277,20 +277,18 @@ class EthicsUtilitarianism(Ethics):
DATASET_NAME
=
"utilitarianism"
def
training_docs
(
self
):
rnd
=
random
.
Random
()
for
doc
in
self
.
dataset
[
"train"
]:
yield
self
.
_process_doc
(
doc
,
rnd
)
yield
self
.
_process_doc
(
doc
)
def
validation_docs
(
self
):
raise
NotImplementedError
def
test_docs
(
self
):
rnd
=
random
.
Random
()
for
doc
in
self
.
dataset
[
"test"
]:
yield
self
.
_process_doc
(
doc
,
rnd
)
yield
self
.
_process_doc
(
doc
)
def
_process_doc
(
self
,
doc
,
rnd
):
rnd
.
seed
(
doc
[
"activity"
])
def
_process_doc
(
self
,
doc
):
rnd
=
random
.
Random
(
doc
[
"activity"
])
scenarios
=
[
doc
[
"activity"
],
doc
[
"baseline"
]]
ordering
=
[
0
,
1
]
rnd
.
shuffle
(
ordering
)
...
...
lm_eval/tasks/hendrycks_math.py
View file @
1cd4ec01
...
...
@@ -38,15 +38,15 @@ class Math(Task):
return
True
def
training_docs
(
self
):
return
map
(
self
.
_
load
_doc
,
self
.
dataset
[
"train"
])
return
map
(
self
.
_
process
_doc
,
self
.
dataset
[
"train"
])
def
validation_docs
(
self
):
return
NotImplemented
def
test_docs
(
self
):
return
map
(
self
.
_
load
_doc
,
self
.
dataset
[
"test"
])
return
map
(
self
.
_
process
_doc
,
self
.
dataset
[
"test"
])
def
_
load
_doc
(
self
,
doc
):
def
_
process
_doc
(
self
,
doc
):
doc
[
"answer"
]
=
self
.
remove_boxed
(
self
.
last_boxed_only_string
(
doc
[
"solution"
]))
return
doc
...
...
lm_eval/tasks/wikitext.py
View file @
1cd4ec01
...
...
@@ -76,15 +76,15 @@ class WikiText(PerplexityTask):
return
True
def
training_docs
(
self
):
return
map
(
self
.
_
load
_doc
,
self
.
dataset
[
"train"
])
return
map
(
self
.
_
process
_doc
,
self
.
dataset
[
"train"
])
def
validation_docs
(
self
):
return
map
(
self
.
_
load
_doc
,
self
.
dataset
[
"validation"
])
return
map
(
self
.
_
process
_doc
,
self
.
dataset
[
"validation"
])
def
test_docs
(
self
):
return
map
(
self
.
_
load
_doc
,
self
.
dataset
[
"test"
])
return
map
(
self
.
_
process
_doc
,
self
.
dataset
[
"test"
])
def
_
load
_doc
(
self
,
doc
):
def
_
process
_doc
(
self
,
doc
):
return
doc
[
"page"
]
def
doc_to_target
(
self
,
doc
):
...
...
lm_eval/tasks/wsc273.py
View file @
1cd4ec01
...
...
@@ -53,9 +53,9 @@ class WinogradSchemaChallenge273(Task):
return
True
def
test_docs
(
self
):
return
map
(
self
.
_
load
_doc
,
self
.
dataset
[
"test"
])
return
map
(
self
.
_
process
_doc
,
self
.
dataset
[
"test"
])
def
_
load
_doc
(
self
,
doc
):
def
_
process
_doc
(
self
,
doc
):
# The HF implementation of `wsc273` is not `partial evaluation` friendly.
doc
[
"text"
]
=
doc
[
"text"
].
replace
(
" "
,
" "
)
doc
[
"options"
][
0
]
=
self
.
__normalize_option
(
doc
,
doc
[
"options"
][
0
])
...
...
setup.py
View file @
1cd4ec01
...
...
@@ -37,7 +37,6 @@ setuptools.setup(
"pycountry==20.7.3"
,
"numexpr==2.7.2"
,
"lm_dataformat==0.0.20"
,
"pytest==6.2.3"
,
"pybind11==2.6.2"
,
"tqdm-multiprocess==0.0.11"
,
"zstandard==0.15.2"
,
...
...
@@ -51,4 +50,5 @@ setuptools.setup(
dependency_links
=
[
"https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt"
,
],
extras_require
=
{
'dev'
:
[
'pytest'
,
'black'
]}
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment