Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
4b133dca
Commit
4b133dca
authored
Mar 28, 2021
by
Leo Gao
Browse files
Merge branch 'master' of github.com:EleutherAI/lm_evaluation_harness into cfsquad
# Conflicts: # lm_eval/tasks/squad.py
parents
8de85534
caba51e1
Changes
41
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
81 additions
and
94 deletions
+81
-94
lm_eval/tasks/pubmedqa.py
lm_eval/tasks/pubmedqa.py
+1
-9
lm_eval/tasks/qa4mre.py
lm_eval/tasks/qa4mre.py
+3
-11
lm_eval/tasks/quac.py
lm_eval/tasks/quac.py
+0
-1
lm_eval/tasks/race.py
lm_eval/tasks/race.py
+0
-5
lm_eval/tasks/sat.py
lm_eval/tasks/sat.py
+1
-8
lm_eval/tasks/sciq.py
lm_eval/tasks/sciq.py
+1
-3
lm_eval/tasks/squad.py
lm_eval/tasks/squad.py
+0
-1
lm_eval/tasks/storycloze.py
lm_eval/tasks/storycloze.py
+2
-4
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+8
-8
lm_eval/tasks/translation.py
lm_eval/tasks/translation.py
+6
-11
lm_eval/tasks/triviaqa.py
lm_eval/tasks/triviaqa.py
+1
-1
lm_eval/tasks/unscramble.py
lm_eval/tasks/unscramble.py
+0
-1
lm_eval/tasks/wikitext.py
lm_eval/tasks/wikitext.py
+4
-7
lm_eval/tasks/wsc273.py
lm_eval/tasks/wsc273.py
+6
-2
lm_eval/utils.py
lm_eval/utils.py
+40
-1
main.py
main.py
+1
-3
scripts/cost_estimate.py
scripts/cost_estimate.py
+2
-10
scripts/fewshot_description_experiment.py
scripts/fewshot_description_experiment.py
+0
-4
scripts/write_out.py
scripts/write_out.py
+4
-3
tests/test_models.py
tests/test_models.py
+1
-1
No files found.
lm_eval/tasks/pubmedqa.py
View file @
4b133dca
import
numpy
as
np
import
numpy
as
np
import
json
import
random
from
.common
import
HFTask
from
.common
import
HFTask
from
lm_eval.base
import
rf
from
lm_eval.base
import
rf
from
..metrics
import
mean
from
..metrics
import
mean
...
@@ -40,12 +38,6 @@ class Pubmed_QA(HFTask):
...
@@ -40,12 +38,6 @@ class Pubmed_QA(HFTask):
def
doc_to_target
(
self
,
doc
):
def
doc_to_target
(
self
,
doc
):
return
" {}"
.
format
(
doc
[
"final_decision"
])
return
" {}"
.
format
(
doc
[
"final_decision"
])
def
fewshot_examples
(
self
,
k
):
# Since only test docs sample from test docs
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
test_docs
())
return
random
.
sample
(
self
.
_training_docs
,
k
)
def
construct_requests
(
self
,
doc
,
ctx
):
def
construct_requests
(
self
,
doc
,
ctx
):
""" Uses RequestFactory to construct Requests and returns
""" Uses RequestFactory to construct Requests and returns
an iterable of Requests which will be sent to the LM.
an iterable of Requests which will be sent to the LM.
...
...
lm_eval/tasks/qa4mre.py
View file @
4b133dca
import
os
import
os
import
numpy
as
np
from
best_download
import
download_file
from
lm_eval.base
import
MultipleChoiceTask
,
rf
from
lm_eval.metrics
import
mean
import
xml.etree.ElementTree
as
ET
import
xml.etree.ElementTree
as
ET
import
random
from
best_download
import
download_file
from
lm_eval.base
import
MultipleChoiceTask
class
QA4MRE
(
MultipleChoiceTask
):
class
QA4MRE
(
MultipleChoiceTask
):
YEAR
=
None
YEAR
=
None
...
@@ -46,12 +44,6 @@ class QA4MRE(MultipleChoiceTask):
...
@@ -46,12 +44,6 @@ class QA4MRE(MultipleChoiceTask):
def
has_test_docs
(
self
):
def
has_test_docs
(
self
):
return
True
return
True
def
fewshot_examples
(
self
,
k
):
# Since only test docs sample from test docs
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
test_docs
())
return
random
.
sample
(
self
.
_training_docs
,
k
)
def
_convert_standard
(
self
,
question
):
def
_convert_standard
(
self
,
question
):
choices
=
[
i
.
text
for
i
in
question
.
iter
(
'answer'
)]
choices
=
[
i
.
text
for
i
in
question
.
iter
(
'answer'
)]
out_doc
=
{
out_doc
=
{
...
...
lm_eval/tasks/quac.py
View file @
4b133dca
import
json
import
json
import
random
import
os
import
os
from
lm_eval.base
import
Task
from
lm_eval.base
import
Task
from
..utils
import
sh
from
..utils
import
sh
...
...
lm_eval/tasks/race.py
View file @
4b133dca
...
@@ -5,11 +5,6 @@ from lm_eval.base import rf
...
@@ -5,11 +5,6 @@ from lm_eval.base import rf
from
..metrics
import
mean
from
..metrics
import
mean
from
.
common
import
HFTask
from
.
common
import
HFTask
import
os
from
functools
import
reduce
import
operator
from
tqdm
import
tqdm
import
json
class
each
:
class
each
:
def
__init__
(
self
,
f
):
def
__init__
(
self
,
f
):
...
...
lm_eval/tasks/sat.py
View file @
4b133dca
import
json
import
random
import
os
import
os
from
lm_eval.base
import
MultipleChoiceTask
,
rf
from
lm_eval.base
import
MultipleChoiceTask
from
..metrics
import
mean
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
simple_accuracy_metric
import
numpy
as
np
from
..utils
import
sh
class
SATAnalogies
(
MultipleChoiceTask
):
class
SATAnalogies
(
MultipleChoiceTask
):
...
...
lm_eval/tasks/sciq.py
View file @
4b133dca
import
os
import
os
import
json
import
json
from
..utils
import
sh
from
lm_eval.base
import
MultipleChoiceTask
,
rf
from
..metrics
import
mean
import
zipfile
import
zipfile
from
lm_eval.base
import
MultipleChoiceTask
from
best_download
import
download_file
from
best_download
import
download_file
...
...
lm_eval/tasks/squad.py
View file @
4b133dca
...
@@ -17,7 +17,6 @@ def _squad_agg(key, items):
...
@@ -17,7 +17,6 @@ def _squad_agg(key, items):
return
_squad_metric
(
predictions
=
predictions
,
references
=
references
)[
key
]
return
_squad_metric
(
predictions
=
predictions
,
references
=
references
)[
key
]
class
SQuAD2
(
HFTask
):
class
SQuAD2
(
HFTask
):
DATASET_PATH
=
"squad_v2"
DATASET_PATH
=
"squad_v2"
DATASET_NAME
=
None
DATASET_NAME
=
None
...
...
lm_eval/tasks/storycloze.py
View file @
4b133dca
import
json
import
random
from
lm_eval.base
import
Task
from
..utils
import
sh
import
csv
import
csv
from
lm_eval.base
import
Task
class
StoryCloze
(
Task
):
class
StoryCloze
(
Task
):
NEEDS_MANUAL_DL
=
True
NEEDS_MANUAL_DL
=
True
...
...
lm_eval/tasks/superglue.py
View file @
4b133dca
...
@@ -4,11 +4,11 @@ To-do:
...
@@ -4,11 +4,11 @@ To-do:
- ReCoRD
- ReCoRD
"""
"""
import
numpy
as
np
import
numpy
as
np
import
sklearn
import
transformers.data.metrics.squad_metrics
as
squad_metrics
from
.
common
import
HFTask
,
yesno
from
.
common
import
HFTask
,
yesno
from
lm_eval.base
import
rf
from
lm_eval.base
import
rf
from
..metrics
import
mean
,
acc_all
,
metric_max_over_ground_truths
from
..metrics
import
mean
,
acc_all
,
metric_max_over_ground_truths
import
sklearn
import
transformers.data.metrics.squad_metrics
as
squad_metrics
from
..utils
import
general_detokenize
from
..utils
import
general_detokenize
...
@@ -23,7 +23,7 @@ class BoolQ(HFTask):
...
@@ -23,7 +23,7 @@ class BoolQ(HFTask):
return
True
return
True
def
has_test_docs
(
self
):
def
has_test_docs
(
self
):
return
Tru
e
return
Fals
e
def
fewshot_description
(
self
):
def
fewshot_description
(
self
):
# TODO: figure out actual description
# TODO: figure out actual description
...
@@ -74,7 +74,7 @@ class CommitmentBank(HFTask):
...
@@ -74,7 +74,7 @@ class CommitmentBank(HFTask):
return
True
return
True
def
has_test_docs
(
self
):
def
has_test_docs
(
self
):
return
Tru
e
return
Fals
e
def
fewshot_description
(
self
):
def
fewshot_description
(
self
):
# TODO: figure out actual description
# TODO: figure out actual description
...
@@ -145,7 +145,7 @@ class Copa(HFTask):
...
@@ -145,7 +145,7 @@ class Copa(HFTask):
return
True
return
True
def
has_test_docs
(
self
):
def
has_test_docs
(
self
):
return
Tru
e
return
Fals
e
def
fewshot_description
(
self
):
def
fewshot_description
(
self
):
# TODO: figure out actual description
# TODO: figure out actual description
...
@@ -209,7 +209,7 @@ class MultiRC(HFTask):
...
@@ -209,7 +209,7 @@ class MultiRC(HFTask):
return
True
return
True
def
has_test_docs
(
self
):
def
has_test_docs
(
self
):
return
Tru
e
return
Fals
e
def
fewshot_description
(
self
):
def
fewshot_description
(
self
):
# TODO: figure out actual description
# TODO: figure out actual description
...
@@ -355,7 +355,7 @@ class WordsInContext(HFTask):
...
@@ -355,7 +355,7 @@ class WordsInContext(HFTask):
return
True
return
True
def
has_test_docs
(
self
):
def
has_test_docs
(
self
):
return
Tru
e
return
Fals
e
def
fewshot_description
(
self
):
def
fewshot_description
(
self
):
# TODO: figure out actual description
# TODO: figure out actual description
...
@@ -412,7 +412,7 @@ class SGWinogradSchemaChallenge(HFTask):
...
@@ -412,7 +412,7 @@ class SGWinogradSchemaChallenge(HFTask):
return
True
return
True
def
has_test_docs
(
self
):
def
has_test_docs
(
self
):
return
Tru
e
return
Fals
e
def
training_docs
(
self
):
def
training_docs
(
self
):
if
self
.
has_training_docs
():
if
self
.
has_training_docs
():
...
...
lm_eval/tasks/translation.py
View file @
4b133dca
import
abc
import
json
import
random
import
os
from
collections
import
Iterable
from
pprint
import
pprint
import
pycountry
import
pycountry
from
pprint
import
pprint
from
sacrebleu
import
sacrebleu
from
sacrebleu
import
sacrebleu
import
logging
from
lm_eval
import
metrics
from
lm_eval
import
metrics
from
lm_eval.base
import
Task
,
rf
from
lm_eval.base
import
Task
,
rf
...
@@ -86,11 +78,14 @@ class GeneralTranslationTask(Task):
...
@@ -86,11 +78,14 @@ class GeneralTranslationTask(Task):
}
for
src
,
ref
in
zip
(
self
.
src_data
,
self
.
ref_data
)]
}
for
src
,
ref
in
zip
(
self
.
src_data
,
self
.
ref_data
)]
def
doc_to_text
(
self
,
doc
):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"src"
]
language_codes
=
self
.
sacrebleu_language_pair
.
split
(
"-"
)
src_lang
=
code_to_language
(
language_codes
[
0
])
tar_lang
=
code_to_language
(
language_codes
[
1
])
return
f
"
{
src_lang
}
phrase: "
+
doc
[
"src"
]
+
f
"
\n
{
tar_lang
}
phrase:"
def
doc_to_target
(
self
,
doc
):
def
doc_to_target
(
self
,
doc
):
# This shows a single target, though there may be multiple targets in a lang test
# This shows a single target, though there may be multiple targets in a lang test
return
doc
[
"ref"
]
if
isinstance
(
doc
[
"ref"
],
str
)
else
doc
[
"ref"
][
0
]
return
" "
+
doc
[
"ref"
]
if
isinstance
(
doc
[
"ref"
],
str
)
else
doc
[
"ref"
][
0
]
def
construct_requests
(
self
,
doc
,
ctx
):
def
construct_requests
(
self
,
doc
,
ctx
):
""" Uses RequestFactory to construct Requests and returns an iterable of
""" Uses RequestFactory to construct Requests and returns an iterable of
...
...
lm_eval/tasks/triviaqa.py
View file @
4b133dca
import
os
import
os
import
json
import
json
import
random
from
lm_eval.base
import
Task
,
rf
from
lm_eval.base
import
Task
,
rf
from
..metrics
import
mean
from
..metrics
import
mean
from
..utils
import
sh
from
..utils
import
sh
class
TriviaQA
(
Task
):
class
TriviaQA
(
Task
):
def
download
(
self
):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/triviaqa'
):
if
not
os
.
path
.
exists
(
'data/triviaqa'
):
...
...
lm_eval/tasks/unscramble.py
View file @
4b133dca
import
gzip
import
gzip
import
json
import
json
import
random
import
shutil
import
shutil
from
pathlib
import
Path
from
pathlib
import
Path
from
best_download
import
download_file
from
best_download
import
download_file
...
...
lm_eval/tasks/wikitext.py
View file @
4b133dca
import
numpy
as
np
from
.
common
import
HFTask
from
scipy.stats
import
pearsonr
,
spearmanr
from
sklearn.metrics
import
f1_score
,
matthews_corrcoef
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
NLP_TASK
,
simple_accuracy_metric
,
yesno
class
WikiText103
(
NLP_TASK
):
class
WikiText103
(
HFTask
):
NLP_PATH
=
"wikitext"
NLP_PATH
=
"wikitext"
NLP_NAME
=
"wikitext-103-raw-v1"
NLP_NAME
=
"wikitext-103-raw-v1"
...
@@ -66,7 +63,7 @@ class WikiText103(NLP_TASK):
...
@@ -66,7 +63,7 @@ class WikiText103(NLP_TASK):
raise
NotImplementedError
(
'Evaluation not implemented'
)
raise
NotImplementedError
(
'Evaluation not implemented'
)
class
WikiText2
(
NLP_TASK
):
class
WikiText2
(
HFTask
):
NLP_PATH
=
"wikitext"
NLP_PATH
=
"wikitext"
NLP_NAME
=
"wikitext-2-raw-v1"
NLP_NAME
=
"wikitext-2-raw-v1"
...
...
lm_eval/tasks/wsc273.py
View file @
4b133dca
...
@@ -56,10 +56,14 @@ class WinogradSchemaChallenge273(HFTask):
...
@@ -56,10 +56,14 @@ class WinogradSchemaChallenge273(HFTask):
# TODO: redo description
# TODO: redo description
return
"Winograd schema sentence with correct continuation. True. Winograd schema sentence with incorrect continuation. False."
return
"Winograd schema sentence with correct continuation. True. Winograd schema sentence with incorrect continuation. False."
def
fewshot_examples
(
self
,
k
):
def
fewshot_examples
(
self
,
k
,
rnd
):
# NOTE: `super().fewshot_examples` samples from training docs which are
# NOTE: `super().fewshot_examples` samples from training docs which are
# not available for this test-set-only dataset.
# not available for this test-set-only dataset.
return
random
.
sample
(
list
(
self
.
test_docs
()),
k
)
if
self
.
_fewshot_docs
is
None
:
self
.
_fewshot_docs
=
list
(
self
.
test_docs
())
return
rnd
.
sample
(
list
(
self
.
_fewshot_docs
),
k
)
def
doc_to_text
(
self
,
doc
):
def
doc_to_text
(
self
,
doc
):
return
self
.
partial_context
(
doc
,
doc
[
"options"
][
doc
[
"label"
]])
return
self
.
partial_context
(
doc
,
doc
[
"options"
][
doc
[
"label"
]])
...
...
lm_eval/utils.py
View file @
4b133dca
import
os
import
os
import
re
import
re
import
collections
class
ExitCodeError
(
Exception
):
class
ExitCodeError
(
Exception
):
...
@@ -42,6 +43,14 @@ def chunks(iter, n):
...
@@ -42,6 +43,14 @@ def chunks(iter, n):
if
arr
:
yield
arr
if
arr
:
yield
arr
def
group
(
arr
,
fn
):
res
=
collections
.
defaultdict
(
list
)
for
ob
in
arr
:
res
[
fn
(
ob
)].
append
(
ob
)
return
list
(
res
.
values
())
def
general_detokenize
(
string
):
def
general_detokenize
(
string
):
string
=
string
.
replace
(
" n't"
,
"n't"
)
string
=
string
.
replace
(
" n't"
,
"n't"
)
string
=
string
.
replace
(
" )"
,
")"
)
string
=
string
.
replace
(
" )"
,
")"
)
...
@@ -50,3 +59,33 @@ def general_detokenize(string):
...
@@ -50,3 +59,33 @@ def general_detokenize(string):
string
=
string
.
replace
(
"
\"
"
,
"
\"
"
)
string
=
string
.
replace
(
"
\"
"
,
"
\"
"
)
string
=
re
.
sub
(
r
" (['.,])"
,
r
"\1"
,
string
)
string
=
re
.
sub
(
r
" (['.,])"
,
r
"\1"
,
string
)
return
string
return
string
class
Reorderer
:
def
__init__
(
self
,
arr
,
fn
):
self
.
size
=
len
(
arr
)
arr
=
list
(
enumerate
(
arr
))
arr
=
group
(
arr
,
lambda
x
:
fn
(
x
[
1
]))
arr
=
[
([
y
[
0
]
for
y
in
x
],
x
[
0
][
1
])
for
x
in
arr
]
arr
.
sort
(
key
=
lambda
x
:
fn
(
x
[
1
]))
self
.
arr
=
arr
def
get_reordered
(
self
):
return
[
x
[
1
]
for
x
in
self
.
arr
]
def
get_original
(
self
,
newarr
):
res
=
[
None
]
*
self
.
size
cov
=
[
False
]
*
self
.
size
for
(
inds
,
_
),
v
in
zip
(
self
.
arr
,
newarr
):
for
ind
in
inds
:
res
[
ind
]
=
v
cov
[
ind
]
=
True
assert
all
(
cov
)
return
res
\ No newline at end of file
main.py
View file @
4b133dca
...
@@ -2,8 +2,6 @@ import argparse
...
@@ -2,8 +2,6 @@ import argparse
import
json
import
json
import
numpy
as
np
import
numpy
as
np
import
random
import
random
import
itertools
import
collections
import
logging
import
logging
from
lm_eval
import
models
,
tasks
,
evaluator
,
base
from
lm_eval
import
models
,
tasks
,
evaluator
,
base
...
@@ -35,7 +33,7 @@ def main():
...
@@ -35,7 +33,7 @@ def main():
print
(
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
)
print
(
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
)
if
not
args
.
no_cache
:
if
not
args
.
no_cache
:
lm
=
base
.
CachingLM
(
lm
,
'lm_cache/'
+
args
.
model
+
'_'
+
args
.
model_args
.
replace
(
'='
,
'-'
).
replace
(
','
,
'_'
)
+
'.db'
)
lm
=
base
.
CachingLM
(
lm
,
'lm_cache/'
+
args
.
model
+
'_'
+
args
.
model_args
.
replace
(
'='
,
'-'
).
replace
(
','
,
'_'
)
.
replace
(
'/'
,
'-'
)
+
'.db'
)
if
args
.
tasks
==
"all_tasks"
:
if
args
.
tasks
==
"all_tasks"
:
task_names
=
tasks
.
ALL_TASKS
task_names
=
tasks
.
ALL_TASKS
else
:
else
:
...
...
scripts/cost_estimate.py
View file @
4b133dca
import
argparse
import
json
import
numpy
as
np
import
random
import
random
import
itertools
import
collections
import
logging
from
lm_eval
import
models
,
tasks
,
evaluator
,
base
import
random
from
lm_eval.base
import
LM
import
transformers
import
transformers
from
lm_eval
import
tasks
,
evaluator
from
lm_eval.base
import
LM
class
DryrunLM
(
LM
):
class
DryrunLM
(
LM
):
...
...
scripts/fewshot_description_experiment.py
View file @
4b133dca
import
argparse
import
json
import
json
import
numpy
as
np
import
numpy
as
np
import
random
import
random
import
itertools
import
collections
import
logging
import
logging
from
lm_eval
import
models
,
tasks
,
evaluator
,
base
from
lm_eval
import
models
,
tasks
,
evaluator
,
base
logging
.
getLogger
(
"openai"
).
setLevel
(
logging
.
WARNING
)
logging
.
getLogger
(
"openai"
).
setLevel
(
logging
.
WARNING
)
...
...
scripts/write_out.py
View file @
4b133dca
...
@@ -2,7 +2,6 @@ import argparse
...
@@ -2,7 +2,6 @@ import argparse
import
numpy
as
np
import
numpy
as
np
import
os
import
os
import
random
import
random
from
lm_eval
import
tasks
from
lm_eval
import
tasks
from
lm_eval.utils
import
join_iters
from
lm_eval.utils
import
join_iters
...
@@ -16,14 +15,13 @@ def parse_args():
...
@@ -16,14 +15,13 @@ def parse_args():
parser
.
add_argument
(
'--provide_description'
,
action
=
"store_true"
)
parser
.
add_argument
(
'--provide_description'
,
action
=
"store_true"
)
parser
.
add_argument
(
'--sets'
,
type
=
str
,
default
=
"val"
)
# example: val,test
parser
.
add_argument
(
'--sets'
,
type
=
str
,
default
=
"val"
)
# example: val,test
parser
.
add_argument
(
'--num_fewshot'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--num_fewshot'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
123
4
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
4
2
)
parser
.
add_argument
(
'--num_examples'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--num_examples'
,
type
=
int
,
default
=
1
)
return
parser
.
parse_args
()
return
parser
.
parse_args
()
def
main
():
def
main
():
args
=
parse_args
()
args
=
parse_args
()
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
if
args
.
tasks
==
"all_tasks"
:
if
args
.
tasks
==
"all_tasks"
:
...
@@ -33,6 +31,8 @@ def main():
...
@@ -33,6 +31,8 @@ def main():
task_dict
=
tasks
.
get_task_dict
(
task_names
)
task_dict
=
tasks
.
get_task_dict
(
task_names
)
os
.
makedirs
(
args
.
output_base_path
,
exist_ok
=
True
)
os
.
makedirs
(
args
.
output_base_path
,
exist_ok
=
True
)
for
task_name
,
task
in
task_dict
.
items
():
for
task_name
,
task
in
task_dict
.
items
():
rnd
=
random
.
Random
()
rnd
.
seed
(
args
.
seed
)
iters
=
[]
iters
=
[]
...
@@ -54,6 +54,7 @@ def main():
...
@@ -54,6 +54,7 @@ def main():
doc
=
doc
,
doc
=
doc
,
provide_description
=
args
.
provide_description
,
provide_description
=
args
.
provide_description
,
num_fewshot
=
args
.
num_fewshot
,
num_fewshot
=
args
.
num_fewshot
,
rnd
=
rnd
)
)
f
.
write
(
ctx
+
"
\n
"
)
f
.
write
(
ctx
+
"
\n
"
)
...
...
tests/test_models.py
View file @
4b133dca
import
lm_eval.models
as
models
import
lm_eval.models
as
models
import
lm_eval.base
as
base
def
test_gpt2
():
def
test_gpt2
():
gpt2
=
models
.
get_model
(
'gpt2'
).
create_from_arg_string
(
"device=cpu"
)
gpt2
=
models
.
get_model
(
'gpt2'
).
create_from_arg_string
(
"device=cpu"
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment