Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c65412e5
Commit
c65412e5
authored
Jan 08, 2022
by
thomasw21
Browse files
Actually it shouldn't be hard to fix it to be compatible with future version
parent
76dc6093
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
26 additions
and
26 deletions
+26
-26
lm_eval/tasks/arithmetic.py
lm_eval/tasks/arithmetic.py
+1
-1
lm_eval/tasks/coqa.py
lm_eval/tasks/coqa.py
+2
-2
lm_eval/tasks/drop.py
lm_eval/tasks/drop.py
+1
-1
lm_eval/tasks/hendrycks_ethics.py
lm_eval/tasks/hendrycks_ethics.py
+1
-1
lm_eval/tasks/hendrycks_math.py
lm_eval/tasks/hendrycks_math.py
+1
-1
lm_eval/tasks/hendrycks_test.py
lm_eval/tasks/hendrycks_test.py
+1
-1
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+2
-2
lm_eval/tasks/lambada_multilingual.py
lm_eval/tasks/lambada_multilingual.py
+2
-2
lm_eval/tasks/logiqa.py
lm_eval/tasks/logiqa.py
+1
-1
lm_eval/tasks/mutual.py
lm_eval/tasks/mutual.py
+2
-2
lm_eval/tasks/pile.py
lm_eval/tasks/pile.py
+2
-2
lm_eval/tasks/qa4mre.py
lm_eval/tasks/qa4mre.py
+2
-2
lm_eval/tasks/sciq.py
lm_eval/tasks/sciq.py
+2
-2
lm_eval/tasks/triviaqa.py
lm_eval/tasks/triviaqa.py
+1
-1
lm_eval/tasks/truthfulqa.py
lm_eval/tasks/truthfulqa.py
+2
-2
lm_eval/tasks/unscramble.py
lm_eval/tasks/unscramble.py
+1
-1
lm_eval/tasks/wikitext.py
lm_eval/tasks/wikitext.py
+1
-1
setup.py
setup.py
+1
-1
No files found.
lm_eval/tasks/arithmetic.py
View file @
c65412e5
...
...
@@ -21,7 +21,7 @@ class Arithmetic(Task):
url
=
'https://raw.githubusercontent.com/openai/gpt-3/master/data/'
+
file_name
if
not
os
.
path
.
exists
(
self
.
directory
):
os
.
makedirs
(
self
.
directory
)
download_file
(
url
,
self
.
directory
+
file_name
,
checksum
)
download_file
(
url
,
local_file
=
self
.
directory
+
file_name
,
expected_checksum
=
checksum
)
self
.
set_docs
()
@
abc
.
abstractmethod
...
...
lm_eval/tasks/coqa.py
View file @
c65412e5
...
...
@@ -16,8 +16,8 @@ class CoQA(Task):
sh
(
"""mkdir -p data/coqa"""
)
download_file
(
"http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json"
,
coqa_train_filepath
,
"b0fdb2bc1bd38dd3ca2ce5fa2ac3e02c6288ac914f241ac409a655ffb6619fa6"
)
download_file
(
"http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-dev-v1.0.json"
,
coqa_dev_filepath
,
"dfa367a9733ce53222918d0231d9b3bedc2b8ee831a2845f62dfc70701f2540a"
)
download_file
(
"http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json"
,
local_file
=
coqa_train_filepath
,
expected_checksum
=
"b0fdb2bc1bd38dd3ca2ce5fa2ac3e02c6288ac914f241ac409a655ffb6619fa6"
)
download_file
(
"http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-dev-v1.0.json"
,
local_file
=
coqa_dev_filepath
,
expected_checksum
=
"dfa367a9733ce53222918d0231d9b3bedc2b8ee831a2845f62dfc70701f2540a"
)
def
has_training_docs
(
self
):
return
True
...
...
lm_eval/tasks/drop.py
View file @
c65412e5
...
...
@@ -27,7 +27,7 @@ class DROP(Task):
url
=
"https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip"
checksum
=
"39d2278a29fd729de301b111a45f434c24834f40df8f4ff116d864589e3249d6"
zip_path
=
self
.
DATASET_PATH
/
"drop_dataset.zip"
download_file
(
url
,
str
(
zip_path
),
checksum
)
download_file
(
url
,
local_file
=
str
(
zip_path
),
expected_checksum
=
checksum
)
with
ZipFile
(
zip_path
,
"r"
)
as
zip
:
zip
.
extractall
(
self
.
DATASET_PATH
)
...
...
lm_eval/tasks/hendrycks_ethics.py
View file @
c65412e5
...
...
@@ -20,7 +20,7 @@ class Ethics(Task):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/ethics/done'
):
sh
(
"mkdir -p data"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar"
,
"data/ethics.tar"
,
"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/ethics.tar"
,
local_file
=
"data/ethics.tar"
,
expected_checksum
=
"40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"
)
sh
(
"""
tar -xf data/ethics.tar -C data/
rm data/ethics.tar
...
...
lm_eval/tasks/hendrycks_math.py
View file @
c65412e5
...
...
@@ -18,7 +18,7 @@ class Math(Task):
def
download
(
self
):
if
not
(
self
.
DATASET_PATH
/
'test'
).
exists
()
or
not
(
self
.
DATASET_PATH
/
'done'
).
exists
():
sh
(
f
"mkdir -p
{
self
.
DATASET_PATH
}
"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/MATH.tar"
,
f
"
{
self
.
DATASET_PATH
}
.tar"
,
"01256fd7cd5430596fdf07e6e6a5827111b5235b7ffed679c662a12f898932da"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/MATH.tar"
,
local_file
=
f
"
{
self
.
DATASET_PATH
}
.tar"
,
expected_checksum
=
"01256fd7cd5430596fdf07e6e6a5827111b5235b7ffed679c662a12f898932da"
)
sh
(
f
"""
tar -xf
{
self
.
DATASET_PATH
}
.tar -C data/ && touch
{
self
.
DATASET_PATH
/
'done'
}
rm
{
self
.
DATASET_PATH
}
.tar
...
...
lm_eval/tasks/hendrycks_test.py
View file @
c65412e5
...
...
@@ -45,7 +45,7 @@ class GeneralHendrycksTest(MultipleChoiceTask):
def
download
(
self
):
if
not
(
self
.
DATASET_PATH
/
'done'
).
exists
():
sh
(
"mkdir -p data"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/data.tar"
,
"data/data.tar"
,
"78a804365a59028188fb19bd1adcadc5e0c260b220a9d8b2e33a5ea7d5fbe3b4"
)
download_file
(
"https://people.eecs.berkeley.edu/~hendrycks/data.tar"
,
local_file
=
"data/data.tar"
,
expected_checksum
=
"78a804365a59028188fb19bd1adcadc5e0c260b220a9d8b2e33a5ea7d5fbe3b4"
)
sh
(
"""
tar -xf data/data.tar -C data/
rm data/data.tar
...
...
lm_eval/tasks/lambada.py
View file @
c65412e5
...
...
@@ -14,8 +14,8 @@ class LAMBADA(Task):
if
not
os
.
path
.
exists
(
"data/lambada/lambada_test.jsonl"
):
download_file
(
"http://eaidata.bmk.sh/data/lambada_test.jsonl"
,
"data/lambada/lambada_test.jsonl"
,
"4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226"
local_file
=
"data/lambada/lambada_test.jsonl"
,
expected_checksum
=
"4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226"
)
except
:
# fallback - for some reason best_download doesnt work all the time here
...
...
lm_eval/tasks/lambada_multilingual.py
View file @
c65412e5
...
...
@@ -32,8 +32,8 @@ class MultilingualLAMBADA(lambada.LAMBADA):
if
not
os
.
path
.
exists
(
f
):
download_file
(
url
,
f
,
CHECKSUMS
[
self
.
LANG
]
local_file
=
f
,
expected_checksum
=
CHECKSUMS
[
self
.
LANG
]
)
except
:
# fallback - for some reason best_download doesnt work all the time here
...
...
lm_eval/tasks/logiqa.py
View file @
c65412e5
...
...
@@ -19,7 +19,7 @@ class LogiQA(MultipleChoiceTask):
]
for
split
in
splits
:
file
=
self
.
DATASET_PATH
/
f
"
{
split
[
'name'
]
}
.txt"
download_file
(
f
"
{
base_url
}
/
{
split
[
'name'
]
}
.txt"
,
str
(
file
),
split
[
"checksum"
])
download_file
(
f
"
{
base_url
}
/
{
split
[
'name'
]
}
.txt"
,
local_file
=
str
(
file
),
expected_checksum
=
split
[
"checksum"
])
def
has_training_docs
(
self
):
return
True
...
...
lm_eval/tasks/mutual.py
View file @
c65412e5
...
...
@@ -36,8 +36,8 @@ class MuTualBase(Task):
master_zip
=
Path
(
"data/master.zip"
)
download_file
(
"https://github.com/Nealcly/MuTual/archive/master.zip"
,
str
(
master_zip
),
"bb325cf6c672f0f02699993a37138b0fa0af6fcfc77ec81dfbe46add4d7b29f9"
)
local_file
=
str
(
master_zip
),
expected_checksum
=
"bb325cf6c672f0f02699993a37138b0fa0af6fcfc77ec81dfbe46add4d7b29f9"
)
with
zipfile
.
ZipFile
(
master_zip
,
'r'
)
as
zip
:
zip
.
extractall
(
"data"
)
Path
(
"data/MuTual-master/data"
).
rename
(
str
(
self
.
BASE_PATH
))
...
...
lm_eval/tasks/pile.py
View file @
c65412e5
...
...
@@ -19,8 +19,8 @@ class PilePerplexityTask(PerplexityTask, abc.ABC):
def
download
(
self
):
# TODO: separate pile val/test out by component so we don't have to scan the entire file once per set
os
.
makedirs
(
"data/pile/"
,
exist_ok
=
True
)
download_file
(
"https://the-eye.eu/public/AI/pile/val.jsonl.zst"
,
self
.
VAL_PATH
,
"264c875d8bbd355d8daa9d032b75fd8fb91606218bb84dd1155b203fcd5fab92"
)
download_file
(
"https://the-eye.eu/public/AI/pile/test.jsonl.zst"
,
self
.
TEST_PATH
,
"0bb28c52d0b5596d389bf179ce2d43bf7f7ffae76b0d2d20b180c97f62e0975e"
)
download_file
(
"https://the-eye.eu/public/AI/pile/val.jsonl.zst"
,
local_file
=
self
.
VAL_PATH
,
expected_checksum
=
"264c875d8bbd355d8daa9d032b75fd8fb91606218bb84dd1155b203fcd5fab92"
)
download_file
(
"https://the-eye.eu/public/AI/pile/test.jsonl.zst"
,
local_file
=
self
.
TEST_PATH
,
expected_checksum
=
"0bb28c52d0b5596d389bf179ce2d43bf7f7ffae76b0d2d20b180c97f62e0975e"
)
def
validation_docs
(
self
):
rdr
=
lm_dataformat
.
Reader
(
self
.
VAL_PATH
)
...
...
lm_eval/tasks/qa4mre.py
View file @
c65412e5
...
...
@@ -32,8 +32,8 @@ class QA4MRE(MultipleChoiceTask):
if
not
os
.
path
.
isfile
(
f
"data/qa4mre/QA4MRE-
{
year
}
-
{
lang
}
"
):
download_file
(
url_path
,
f
"data/qa4mre/QA4MRE-
{
year
}
-
{
lang
}
_GS.xml"
,
sha256sums
[
year
],
local_file
=
f
"data/qa4mre/QA4MRE-
{
year
}
-
{
lang
}
_GS.xml"
,
expected_checksum
=
sha256sums
[
year
],
)
def
has_training_docs
(
self
):
...
...
lm_eval/tasks/sciq.py
View file @
c65412e5
...
...
@@ -13,8 +13,8 @@ class SciQ(MultipleChoiceTask):
os
.
makedirs
(
'data/sciq'
,
exist_ok
=
True
)
download_file
(
'https://ai2-public-datasets.s3.amazonaws.com/sciq/SciQ.zip'
,
'data/sciq/SciQ.zip'
,
'7f3312f6ac6b09970b32942d106a8c44ec0dad46a0369f17d635aff8e348a87c'
,
local_file
=
'data/sciq/SciQ.zip'
,
expected_checksum
=
'7f3312f6ac6b09970b32942d106a8c44ec0dad46a0369f17d635aff8e348a87c'
,
)
with
zipfile
.
ZipFile
(
"data/sciq/SciQ.zip"
,
"r"
)
as
zf
:
zf
.
extractall
(
"data/sciq/"
)
...
...
lm_eval/tasks/triviaqa.py
View file @
c65412e5
...
...
@@ -12,7 +12,7 @@ class TriviaQA(Task):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/triviaqa/unfiltered-web-train.jsonl'
):
os
.
makedirs
(
"data/triviaqa/"
,
exist_ok
=
True
)
download_file
(
"http://eaidata.bmk.sh/data/triviaqa-unfiltered.tar.gz"
,
"data/triviaqa/triviaqa-unfiltered.tar.gz"
,
"adc19b42769062d241a8fbe834c56e58598d9322eb6c614e9f33a68a2cf5523e"
)
download_file
(
"http://eaidata.bmk.sh/data/triviaqa-unfiltered.tar.gz"
,
local_file
=
"data/triviaqa/triviaqa-unfiltered.tar.gz"
,
expected_checksum
=
"adc19b42769062d241a8fbe834c56e58598d9322eb6c614e9f33a68a2cf5523e"
)
sh
(
"""
cd data/triviaqa/
tar -xf triviaqa-unfiltered.tar.gz
...
...
lm_eval/tasks/truthfulqa.py
View file @
c65412e5
...
...
@@ -58,7 +58,7 @@ class TruthfulQAMultipleChoice(Task):
Path
.
mkdir
(
self
.
DATASET_PATH
,
parents
=
True
)
mc_url
=
"https://raw.githubusercontent.com/sylinrl/TruthfulQA/013686a06be7a7bde5bf8223943e106c7250123c/data/mc_task.json"
checksum
=
"6eb4125d25750c0145c4be2dce00440736684ab6f74ce6bff2139571cc758954"
download_file
(
mc_url
,
str
(
self
.
DATASET_PATH
/
"mc_task.json"
),
checksum
)
download_file
(
mc_url
,
local_file
=
str
(
self
.
DATASET_PATH
/
"mc_task.json"
),
expected_checksum
=
checksum
)
def
has_training_docs
(
self
):
return
False
...
...
@@ -163,7 +163,7 @@ class TruthfulQAGeneration(Task):
Path
.
mkdir
(
self
.
DATASET_PATH
,
parents
=
True
)
url
=
"https://raw.githubusercontent.com/sylinrl/TruthfulQA/013686a06be7a7bde5bf8223943e106c7250123c/TruthfulQA.csv"
checksum
=
"8d7dd15f033196140f032d97d30f037da7a7b1192c3f36f9937c1850925335a2"
download_file
(
url
,
str
(
self
.
DATASET_PATH
/
"TruthfulQA.csv"
),
checksum
)
download_file
(
url
,
local_file
=
str
(
self
.
DATASET_PATH
/
"TruthfulQA.csv"
),
expected_checksum
=
checksum
)
def
has_training_docs
(
self
):
return
False
...
...
lm_eval/tasks/unscramble.py
View file @
c65412e5
...
...
@@ -29,7 +29,7 @@ class WordUnscrambleTask(Task):
if
not
file
.
exists
():
rawfile
=
file
.
parent
/
(
file
.
name
+
".gz"
)
base_url
=
"https://raw.githubusercontent.com/openai/gpt-3/master/data"
download_file
(
f
"
{
base_url
}
/
{
self
.
FILENAME
}
.gz"
,
str
(
rawfile
),
self
.
CHECKSUM
)
download_file
(
f
"
{
base_url
}
/
{
self
.
FILENAME
}
.gz"
,
local_file
=
str
(
rawfile
),
expected_checksum
=
self
.
CHECKSUM
)
extract_gzip
(
gz
=
rawfile
,
to
=
file
)
def
has_training_docs
(
self
):
...
...
lm_eval/tasks/wikitext.py
View file @
c65412e5
...
...
@@ -46,7 +46,7 @@ class WikiText(PerplexityTask):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/wikitext/wikitext-2-raw/wiki.valid.raw'
):
os
.
makedirs
(
"data/wikitext/"
,
exist_ok
=
True
)
download_file
(
"https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip"
,
"data/wikitext/wikitext-2-raw-v1.zip"
,
"ef7edb566e3e2b2d31b29c1fdb0c89a4cc683597484c3dc2517919c615435a11"
)
download_file
(
"https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip"
,
local_file
=
"data/wikitext/wikitext-2-raw-v1.zip"
,
expected_checksum
=
"ef7edb566e3e2b2d31b29c1fdb0c89a4cc683597484c3dc2517919c615435a11"
)
sh
(
"cd data/wikitext/ && unzip wikitext-2-raw-v1.zip"
)
def
fewshot_description
(
self
):
...
...
setup.py
View file @
c65412e5
...
...
@@ -21,7 +21,7 @@ setuptools.setup(
python_requires
=
'>=3.6'
,
install_requires
=
[
"black"
,
"best_download>=0.0.6
,<0.0.8
"
,
"best_download>=0.0.6"
,
"datasets==1.15.1"
,
"click>=7.1"
,
"scikit-learn>=0.24.1"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment