Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
fe4a1efd
Commit
fe4a1efd
authored
Feb 01, 2021
by
Leo Gao
Browse files
Fix linting problems
parent
7ab3f8ca
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
25 additions
and
21 deletions
+25
-21
lm_eval/models/dummy.py
lm_eval/models/dummy.py
+1
-1
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+1
-1
lm_eval/tasks/arc.py
lm_eval/tasks/arc.py
+1
-1
lm_eval/tasks/drop.py
lm_eval/tasks/drop.py
+1
-1
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+1
-1
lm_eval/tasks/naturalqs.py
lm_eval/tasks/naturalqs.py
+1
-1
lm_eval/tasks/openbookqa.py
lm_eval/tasks/openbookqa.py
+1
-1
lm_eval/tasks/piqa.py
lm_eval/tasks/piqa.py
+1
-1
lm_eval/tasks/quac.py
lm_eval/tasks/quac.py
+1
-1
lm_eval/tasks/race.py
lm_eval/tasks/race.py
+2
-1
lm_eval/tasks/squad.py
lm_eval/tasks/squad.py
+1
-1
lm_eval/tasks/storycloze.py
lm_eval/tasks/storycloze.py
+1
-1
lm_eval/tasks/triviaqa.py
lm_eval/tasks/triviaqa.py
+1
-1
lm_eval/tasks/webqs.py
lm_eval/tasks/webqs.py
+1
-1
lm_eval/tasks/wikitext.py
lm_eval/tasks/wikitext.py
+1
-1
lm_eval/tasks/winogrande.py
lm_eval/tasks/winogrande.py
+1
-1
lm_eval/utils.py
lm_eval/utils.py
+1
-1
lm_eval/utils_stream.py
lm_eval/utils_stream.py
+7
-4
No files found.
lm_eval/models/dummy.py
View file @
fe4a1efd
...
@@ -20,4 +20,4 @@ class DummyLM(LM):
...
@@ -20,4 +20,4 @@ class DummyLM(LM):
def
greedy_until
(
self
,
requests
):
def
greedy_until
(
self
,
requests
):
# TODO: implement
# TODO: implement
pass
pass
\ No newline at end of file
lm_eval/models/gpt2.py
View file @
fe4a1efd
...
@@ -43,4 +43,4 @@ class GPT2LM(LM):
...
@@ -43,4 +43,4 @@ class GPT2LM(LM):
def
greedy_until
(
self
,
requests
):
def
greedy_until
(
self
,
requests
):
# TODO: implement
# TODO: implement
pass
pass
\ No newline at end of file
lm_eval/tasks/arc.py
View file @
fe4a1efd
...
@@ -70,4 +70,4 @@ class ARCEasy(HFTask):
...
@@ -70,4 +70,4 @@ class ARCEasy(HFTask):
class
ARCChallenge
(
ARCEasy
):
class
ARCChallenge
(
ARCEasy
):
DATASET_PATH
=
"ai2_arc"
DATASET_PATH
=
"ai2_arc"
DATASET_NAME
=
"ARC-Challenge"
DATASET_NAME
=
"ARC-Challenge"
\ No newline at end of file
lm_eval/tasks/drop.py
View file @
fe4a1efd
...
@@ -104,4 +104,4 @@ class DROP(Dataset):
...
@@ -104,4 +104,4 @@ class DROP(Dataset):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/tasks/lambada.py
View file @
fe4a1efd
...
@@ -67,4 +67,4 @@ class LAMBADA(Dataset):
...
@@ -67,4 +67,4 @@ class LAMBADA(Dataset):
return
{
return
{
'perplexity'
:
False
,
'perplexity'
:
False
,
'accuracy'
:
True
'accuracy'
:
True
}
}
\ No newline at end of file
lm_eval/tasks/naturalqs.py
View file @
fe4a1efd
...
@@ -87,4 +87,4 @@ class NaturalQs(HFTask):
...
@@ -87,4 +87,4 @@ class NaturalQs(HFTask):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/tasks/openbookqa.py
View file @
fe4a1efd
...
@@ -95,4 +95,4 @@ class OpenBookQA(HFTask):
...
@@ -95,4 +95,4 @@ class OpenBookQA(HFTask):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/tasks/piqa.py
View file @
fe4a1efd
...
@@ -74,4 +74,4 @@ class PiQA(Dataset):
...
@@ -74,4 +74,4 @@ class PiQA(Dataset):
def
higher_is_better
(
self
):
def
higher_is_better
(
self
):
return
{
return
{
'acc'
:
True
'acc'
:
True
}
}
\ No newline at end of file
lm_eval/tasks/quac.py
View file @
fe4a1efd
...
@@ -103,4 +103,4 @@ class QuAC(Dataset):
...
@@ -103,4 +103,4 @@ class QuAC(Dataset):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/tasks/race.py
View file @
fe4a1efd
...
@@ -23,7 +23,8 @@ class RACE(HFTask):
...
@@ -23,7 +23,8 @@ class RACE(HFTask):
return
True
return
True
def
_collate_data
(
self
,
set
):
def
_collate_data
(
self
,
set
):
if
set
in
self
.
cache
:
return
self
.
cache
[
set
]
if
set
in
self
.
cache
:
return
self
.
cache
[
set
]
# One big issue with HF's implementation of this dataset: it makes a
# One big issue with HF's implementation of this dataset: it makes a
# separate document for each question; meanwhile, in the GPT3 paper it
# separate document for each question; meanwhile, in the GPT3 paper it
# is shown that one document is made per passage.
# is shown that one document is made per passage.
...
...
lm_eval/tasks/squad.py
View file @
fe4a1efd
...
@@ -83,4 +83,4 @@ class SQuAD(HFTask):
...
@@ -83,4 +83,4 @@ class SQuAD(HFTask):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/tasks/storycloze.py
View file @
fe4a1efd
...
@@ -89,4 +89,4 @@ class StoryCloze(Dataset):
...
@@ -89,4 +89,4 @@ class StoryCloze(Dataset):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/tasks/triviaqa.py
View file @
fe4a1efd
...
@@ -74,4 +74,4 @@ class TriviaQA(Dataset):
...
@@ -74,4 +74,4 @@ class TriviaQA(Dataset):
def
higher_is_better
(
self
):
def
higher_is_better
(
self
):
return
{
return
{
"acc"
:
True
"acc"
:
True
}
}
\ No newline at end of file
lm_eval/tasks/webqs.py
View file @
fe4a1efd
...
@@ -70,4 +70,4 @@ class WebQs(HFTask):
...
@@ -70,4 +70,4 @@ class WebQs(HFTask):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/tasks/wikitext.py
View file @
fe4a1efd
...
@@ -121,4 +121,4 @@ class WikiText2(NLP_TASK):
...
@@ -121,4 +121,4 @@ class WikiText2(NLP_TASK):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/tasks/winogrande.py
View file @
fe4a1efd
...
@@ -90,4 +90,4 @@ class Winogrande(HFTask):
...
@@ -90,4 +90,4 @@ class Winogrande(HFTask):
whether a higher value of the submetric is better
whether a higher value of the submetric is better
"""
"""
# TODO: implement evaluation.
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
lm_eval/utils.py
View file @
fe4a1efd
...
@@ -28,4 +28,4 @@ def simple_parse_args_string(args_string):
...
@@ -28,4 +28,4 @@ def simple_parse_args_string(args_string):
def
join_iters
(
iters
):
def
join_iters
(
iters
):
for
iter
in
iters
:
for
iter
in
iters
:
yield
from
iter
yield
from
iter
\ No newline at end of file
lm_eval/utils_stream.py
View file @
fe4a1efd
...
@@ -5,11 +5,13 @@ from tqdm import tqdm
...
@@ -5,11 +5,13 @@ from tqdm import tqdm
import
json
import
json
class
ExitCodeError
(
Exception
):
pass
class
ExitCodeError
(
Exception
):
pass
def
sh
(
x
):
def
sh
(
x
):
if
os
.
system
(
x
):
raise
ExitCodeError
()
if
os
.
system
(
x
):
raise
ExitCodeError
()
def
ls
(
x
):
def
ls
(
x
):
return
[
x
+
'/'
+
fn
for
fn
in
os
.
listdir
(
x
)]
return
[
x
+
'/'
+
fn
for
fn
in
os
.
listdir
(
x
)]
...
@@ -64,7 +66,8 @@ class join:
...
@@ -64,7 +66,8 @@ class join:
self
.
sep
=
sep
self
.
sep
=
sep
def
__rrshift__
(
self
,
other
):
def
__rrshift__
(
self
,
other
):
if
other
is
None
:
return
if
other
is
None
:
return
try
:
try
:
return
self
.
sep
.
join
(
other
)
return
self
.
sep
.
join
(
other
)
except
:
except
:
...
@@ -156,4 +159,4 @@ def comp(*fs):
...
@@ -156,4 +159,4 @@ def comp(*fs):
return
_f
return
_f
X
=
Reflective
()
X
=
Reflective
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment