Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
20229183
"vscode:/vscode.git/clone" did not exist on "5e854f25098ca40a4ac80197607bf883feaeb375"
Commit
20229183
authored
Jun 04, 2021
by
Leo Gao
Browse files
convert all enabled tasks to use best_download for better download robustness
parent
85277915
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
40 additions
and
28 deletions
+40
-28
lm_eval/tasks/coqa.py
lm_eval/tasks/coqa.py
+4
-4
lm_eval/tasks/hendrycks_ethics.py
lm_eval/tasks/hendrycks_ethics.py
+8
-6
lm_eval/tasks/hendrycks_math.py
lm_eval/tasks/hendrycks_math.py
+5
-3
lm_eval/tasks/hendrycks_test.py
lm_eval/tasks/hendrycks_test.py
+9
-7
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+10
-6
lm_eval/tasks/pile.py
lm_eval/tasks/pile.py
+2
-2
lm_eval/tasks/quac.py
lm_eval/tasks/quac.py
+1
-0
lm_eval/tasks/triviaqa.py
lm_eval/tasks/triviaqa.py
+1
-0
No files found.
lm_eval/tasks/coqa.py
View file @
20229183
...
@@ -4,6 +4,7 @@ import transformers.data.metrics.squad_metrics as squad_metrics
...
@@ -4,6 +4,7 @@ import transformers.data.metrics.squad_metrics as squad_metrics
from
lm_eval.base
import
Task
,
rf
,
mean
from
lm_eval.base
import
Task
,
rf
,
mean
from
..utils
import
sh
from
..utils
import
sh
from
itertools
import
zip_longest
from
itertools
import
zip_longest
from
best_download
import
download_file
class
CoQA
(
Task
):
class
CoQA
(
Task
):
...
@@ -14,10 +15,9 @@ class CoQA(Task):
...
@@ -14,10 +15,9 @@ class CoQA(Task):
coqa_dev_filepath
=
'data/coqa/coqa-dev-v1.0.json'
coqa_dev_filepath
=
'data/coqa/coqa-dev-v1.0.json'
sh
(
"""mkdir -p data/coqa"""
)
sh
(
"""mkdir -p data/coqa"""
)
if
not
os
.
path
.
exists
(
coqa_train_filepath
):
sh
(
"""wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json -O """
+
coqa_train_filepath
)
download_file
(
"http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json"
,
coqa_train_filepath
,
"b0fdb2bc1bd38dd3ca2ce5fa2ac3e02c6288ac914f241ac409a655ffb6619fa6"
)
if
not
os
.
path
.
exists
(
coqa_dev_filepath
):
download_file
(
"http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-dev-v1.0.json"
,
coqa_dev_filepath
,
"dfa367a9733ce53222918d0231d9b3bedc2b8ee831a2845f62dfc70701f2540a"
)
sh
(
"""wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-dev-v1.0.json -O """
+
coqa_dev_filepath
)
def
has_training_docs
(
self
):
def
has_training_docs
(
self
):
return
True
return
True
...
...
lm_eval/tasks/hendrycks_ethics.py
View file @
20229183
...
@@ -7,6 +7,7 @@ from lm_eval.base import Task, rf
...
@@ -7,6 +7,7 @@ from lm_eval.base import Task, rf
from
lm_eval.metrics
import
mean
from
lm_eval.metrics
import
mean
from
lm_eval.utils
import
sh
from
lm_eval.utils
import
sh
from
.common
import
yesno
from
.common
import
yesno
from
best_download
import
download_file
"""
"""
NOTE: The reported "group" accuracies for the Deontology, Justice, and Virtue
NOTE: The reported "group" accuracies for the Deontology, Justice, and Virtue
...
@@ -17,13 +18,14 @@ of the paper.
...
@@ -17,13 +18,14 @@ of the paper.
class
Ethics
(
Task
):
class
Ethics
(
Task
):
def
download
(
self
):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/ethics'
):
if
not
os
.
path
.
exists
(
'data/ethics/done'
):
sh
(
"mkdir -p data"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar"
,
"data/ethics.tar"
,
"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"
)
sh
(
"""
sh
(
"""
mkdir -p data
tar -xf data/ethics.tar -C data/
wget https://people.eecs.berkeley.edu/~hendrycks/ethics.tar -P data/
rm data/ethics.tar
tar -xf data/ethics.tar -C data/
touch data/ethics/done
rm data/ethics.tar
"""
)
"""
)
def
has_training_docs
(
self
):
def
has_training_docs
(
self
):
return
True
return
True
...
...
lm_eval/tasks/hendrycks_math.py
View file @
20229183
...
@@ -4,6 +4,7 @@ from lm_eval.utils import sh
...
@@ -4,6 +4,7 @@ from lm_eval.utils import sh
from
lm_eval.metrics
import
mean
from
lm_eval.metrics
import
mean
from
lm_eval.base
import
Task
,
rf
from
lm_eval.base
import
Task
,
rf
from
pathlib
import
Path
from
pathlib
import
Path
from
best_download
import
download_file
class
Math
(
Task
):
class
Math
(
Task
):
...
@@ -15,12 +16,13 @@ class Math(Task):
...
@@ -15,12 +16,13 @@ class Math(Task):
DATASET_PATH
=
Path
(
'data/MATH'
)
DATASET_PATH
=
Path
(
'data/MATH'
)
def
download
(
self
):
def
download
(
self
):
if
not
self
.
DATASET_PATH
.
exists
():
if
not
(
self
.
DATASET_PATH
/
'done'
).
exists
():
sh
(
f
"mkdir -p
{
self
.
DATASET_PATH
}
"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/MATH.tar.gz"
,
f
"
{
self
.
DATASET_PATH
}
.tar.gz"
,
"e867c0df3e45e5f8219296d12dd4579d91fe313de8dc020ed17382e84f64c2f7"
)
sh
(
f
"""
sh
(
f
"""
mkdir -p
{
self
.
DATASET_PATH
}
wget https://people.eecs.berkeley.edu/~hendrycks/MATH.tar.gz -P data/
tar -xf
{
self
.
DATASET_PATH
}
.tar.gz -C data/
tar -xf
{
self
.
DATASET_PATH
}
.tar.gz -C data/
rm
{
self
.
DATASET_PATH
}
.tar.gz
rm
{
self
.
DATASET_PATH
}
.tar.gz
touch
{
self
.
DATASET_PATH
/
'done'
}
"""
)
"""
)
@
abc
.
abstractmethod
@
abc
.
abstractmethod
...
...
lm_eval/tasks/hendrycks_test.py
View file @
20229183
...
@@ -3,6 +3,7 @@ import random
...
@@ -3,6 +3,7 @@ import random
from
lm_eval.base
import
MultipleChoiceTask
from
lm_eval.base
import
MultipleChoiceTask
from
..utils
import
sh
from
..utils
import
sh
from
pathlib
import
Path
from
pathlib
import
Path
from
best_download
import
download_file
SUBJECTS
=
[
'abstract_algebra'
,
'anatomy'
,
'astronomy'
,
'business_ethics'
,
'clinical_knowledge'
,
'college_biology'
,
SUBJECTS
=
[
'abstract_algebra'
,
'anatomy'
,
'astronomy'
,
'business_ethics'
,
'clinical_knowledge'
,
'college_biology'
,
'college_chemistry'
,
'college_computer_science'
,
'college_mathematics'
,
'college_medicine'
,
'college_physics'
,
'college_chemistry'
,
'college_computer_science'
,
'college_mathematics'
,
'college_medicine'
,
'college_physics'
,
...
@@ -42,14 +43,15 @@ class GeneralHendrycksTest(MultipleChoiceTask):
...
@@ -42,14 +43,15 @@ class GeneralHendrycksTest(MultipleChoiceTask):
super
().
__init__
()
super
().
__init__
()
def
download
(
self
):
def
download
(
self
):
if
not
self
.
DATASET_PATH
.
exists
():
if
not
(
self
.
DATASET_PATH
/
'done'
).
exists
():
sh
(
"mkdir -p data"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/data.tar"
,
"data/data.tar"
,
"78a804365a59028188fb19bd1adcadc5e0c260b220a9d8b2e33a5ea7d5fbe3b4"
)
sh
(
"""
sh
(
"""
mkdir -p data
tar -xf data/data.tar -C data/
wget -c https://people.eecs.berkeley.edu/~hendrycks/data.tar -P data/
rm data/data.tar
tar -xf data/data.tar -C data/
mv data/data data/hendrycksTest
rm data/data.tar
touch data/hendrycksTest/done
mv data/data data/hendrycksTest
"""
)
"""
)
def
has_training_docs
(
self
):
def
has_training_docs
(
self
):
return
True
return
True
...
...
lm_eval/tasks/lambada.py
View file @
20229183
...
@@ -9,12 +9,16 @@ class LAMBADA(Task):
...
@@ -9,12 +9,16 @@ class LAMBADA(Task):
VERSION
=
0
VERSION
=
0
def
download
(
self
):
def
download
(
self
):
sh
(
"mkdir -p data/lambada"
)
sh
(
"mkdir -p data/lambada"
)
sh
(
"wget http://eaidata.bmk.sh/data/lambada_test.jsonl -O data/lambada/lambada_test.jsonl"
)
try
:
# download_file(
download_file
(
# "http://eaidata.bmk.sh/data/lambada_test.jsonl",
"http://eaidata.bmk.sh/data/lambada_test.jsonl"
,
# "data/lambada/lambada_test.jsonl",
"data/lambada/lambada_test.jsonl"
,
# "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226"
"4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226"
# )
)
except
:
# fallback - for some reason best_download doesnt work all the time here
sh
(
"wget http://eaidata.bmk.sh/data/lambada_test.jsonl -O data/lambada/lambada_test.jsonl"
)
sh
(
'echo "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226 data/lambada/lambada_test.jsonl" | sha256sum --check'
)
def
has_training_docs
(
self
):
def
has_training_docs
(
self
):
return
False
return
False
...
...
lm_eval/tasks/pile.py
View file @
20229183
...
@@ -19,8 +19,8 @@ class PilePerplexityTask(PerplexityTask, abc.ABC):
...
@@ -19,8 +19,8 @@ class PilePerplexityTask(PerplexityTask, abc.ABC):
def
download
(
self
):
def
download
(
self
):
# TODO: separate pile val/test out by component so we don't have to scan the entire file once per set
# TODO: separate pile val/test out by component so we don't have to scan the entire file once per set
os
.
makedirs
(
"data/pile/"
,
exist_ok
=
True
)
os
.
makedirs
(
"data/pile/"
,
exist_ok
=
True
)
download_file
(
"https://the-eye.eu/public/AI/pile/val.jsonl.zst"
,
self
.
VAL_PATH
)
download_file
(
"https://the-eye.eu/public/AI/pile/val.jsonl.zst"
,
self
.
VAL_PATH
,
"264c875d8bbd355d8daa9d032b75fd8fb91606218bb84dd1155b203fcd5fab92"
)
download_file
(
"https://the-eye.eu/public/AI/pile/test.jsonl.zst"
,
self
.
TEST_PATH
)
download_file
(
"https://the-eye.eu/public/AI/pile/test.jsonl.zst"
,
self
.
TEST_PATH
,
"0bb28c52d0b5596d389bf179ce2d43bf7f7ffae76b0d2d20b180c97f62e0975e"
)
def
validation_docs
(
self
):
def
validation_docs
(
self
):
rdr
=
lm_dataformat
.
Reader
(
self
.
VAL_PATH
)
rdr
=
lm_dataformat
.
Reader
(
self
.
VAL_PATH
)
...
...
lm_eval/tasks/quac.py
View file @
20229183
...
@@ -24,6 +24,7 @@ class QuAC(Task):
...
@@ -24,6 +24,7 @@ class QuAC(Task):
def
download
(
self
):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/quac'
):
if
not
os
.
path
.
exists
(
'data/quac'
):
# TODO: convert to use best_download
sh
(
"""
sh
(
"""
mkdir -p data/quac
mkdir -p data/quac
wget https://s3.amazonaws.com/my89public/quac/train_v0.2.json -O data/quac/train_v0.2.json
wget https://s3.amazonaws.com/my89public/quac/train_v0.2.json -O data/quac/train_v0.2.json
...
...
lm_eval/tasks/triviaqa.py
View file @
20229183
...
@@ -9,6 +9,7 @@ class TriviaQA(Task):
...
@@ -9,6 +9,7 @@ class TriviaQA(Task):
VERSION
=
0
VERSION
=
0
def
download
(
self
):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/triviaqa'
):
if
not
os
.
path
.
exists
(
'data/triviaqa'
):
# TODO: convert to best_download
sh
(
"""
sh
(
"""
mkdir -p data/triviaqa
mkdir -p data/triviaqa
wget http://nlp.cs.washington.edu/triviaqa/data/triviaqa-unfiltered.tar.gz -O data/triviaqa/trivia_qa-unfiltered.tar.gz
wget http://nlp.cs.washington.edu/triviaqa/data/triviaqa-unfiltered.tar.gz -O data/triviaqa/trivia_qa-unfiltered.tar.gz
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment